14887Schin /***********************************************************************
24887Schin * *
34887Schin * This software is part of the ast package *
4*12068SRoger.Faulkner@Oracle.COM * Copyright (c) 1985-2010 AT&T Intellectual Property *
54887Schin * and is licensed under the *
64887Schin * Common Public License, Version 1.0 *
78462SApril.Chin@Sun.COM * by AT&T Intellectual Property *
84887Schin * *
94887Schin * A copy of the License is available at *
104887Schin * http://www.opensource.org/licenses/cpl1.0.txt *
114887Schin * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
124887Schin * *
134887Schin * Information and Software Systems Research *
144887Schin * AT&T Research *
154887Schin * Florham Park NJ *
164887Schin * *
174887Schin * Glenn Fowler <gsf@research.att.com> *
184887Schin * David Korn <dgk@research.att.com> *
194887Schin * Phong Vo <kpv@research.att.com> *
204887Schin * *
214887Schin ***********************************************************************/
224887Schin #pragma prototyped
234887Schin /*
244887Schin * regcmp implementation
254887Schin */
264887Schin
274887Schin #include <ast.h>
284887Schin #include <libgen.h>
294887Schin #include <regex.h>
304887Schin #include <align.h>
314887Schin
324887Schin #define INC (2*1024)
334887Schin #define TOT (16*1024)
344887Schin #define SUB 10
354887Schin
364887Schin typedef struct
374887Schin {
384887Schin char* cur;
394887Schin regex_t re;
404887Schin unsigned char sub[SUB];
414887Schin int nsub;
424887Schin size_t size;
434887Schin char buf[ALIGN_BOUND2];
444887Schin } Regex_t;
454887Schin
464887Schin __DEFINE__(char*, __loc1, 0);
474887Schin
484887Schin static void*
block(void * handle,void * data,size_t size)494887Schin block(void* handle, void* data, size_t size)
504887Schin {
514887Schin register Regex_t* re = (Regex_t*)handle;
524887Schin
534887Schin if (data || (size = roundof(size, ALIGN_BOUND2)) > (re->buf + re->size - re->cur))
544887Schin return 0;
554887Schin data = (void*)re->cur;
564887Schin re->cur += size;
574887Schin return data;
584887Schin }
594887Schin
604887Schin char*
regcmp(const char * pattern,...)614887Schin regcmp(const char* pattern, ...)
624887Schin {
634887Schin register char* s;
644887Schin register Regex_t* re;
654887Schin register size_t n;
664887Schin register int c;
674887Schin register int p;
684887Schin int b;
694887Schin int i;
704887Schin int j;
714887Schin int nsub;
724887Schin register Sfio_t* sp;
734887Schin unsigned char paren[128];
744887Schin unsigned char sub[SUB];
754887Schin va_list ap;
764887Schin
774887Schin va_start(ap, pattern);
784887Schin if (!pattern || !*pattern || !(sp = sfstropen()))
794887Schin return 0;
804887Schin memset(paren, 0, sizeof(paren));
814887Schin n = 0;
824887Schin p = -1;
834887Schin b = 0;
844887Schin nsub = 0;
854887Schin s = (char*)pattern;
864887Schin do
874887Schin {
884887Schin while (c = *s++)
894887Schin {
904887Schin if (c == '\\')
914887Schin {
924887Schin sfputc(sp, c);
934887Schin if (!(c = *s++))
944887Schin break;
954887Schin }
964887Schin else if (b)
974887Schin {
984887Schin if (c == ']')
994887Schin b = 0;
1004887Schin }
1014887Schin else if (c == '[')
1024887Schin {
1034887Schin b = 1;
1044887Schin if (*s == '^')
1054887Schin {
1064887Schin sfputc(sp, c);
1074887Schin c = *s++;
1084887Schin }
1094887Schin if (*s == ']')
1104887Schin {
1114887Schin sfputc(sp, c);
1124887Schin c = *s++;
1134887Schin }
1144887Schin }
1154887Schin else if (c == '(')
1164887Schin {
1174887Schin /*
1184887Schin * someone explain in one sentence why
1194887Schin * a cast is needed to make this work
1204887Schin */
1214887Schin
1224887Schin if (p < (int)(elementsof(paren) - 1))
1234887Schin p++;
1244887Schin paren[p] = ++n;
1254887Schin }
1264887Schin else if (c == ')' && p >= 0)
1274887Schin {
1284887Schin for (i = p; i > 0; i--)
1294887Schin if (paren[i])
1304887Schin break;
1314887Schin if (*s == '$' && (j = *(s + 1)) >= '0' && j <= '9')
1324887Schin {
1334887Schin s += 2;
1344887Schin j -= '0';
1354887Schin if (nsub <= j)
1364887Schin {
1374887Schin if (!nsub)
1384887Schin memset(sub, 0, sizeof(sub));
1394887Schin nsub = j + 1;
1404887Schin }
1414887Schin sub[j] = paren[i] + 1;
1424887Schin }
1434887Schin paren[i] = 0;
1444887Schin }
1454887Schin sfputc(sp, c);
1464887Schin }
1474887Schin } while (s = va_arg(ap, char*));
1484887Schin va_end(ap);
1494887Schin if (!(s = sfstruse(sp)))
1504887Schin {
1514887Schin sfstrclose(sp);
1524887Schin return 0;
1534887Schin }
1544887Schin re = 0;
1554887Schin n = 0;
1564887Schin do
1574887Schin {
1584887Schin if ((n += INC) > TOT || !(re = newof(re, Regex_t, 0, n)))
1594887Schin {
1604887Schin if (re)
1614887Schin free(re);
1624887Schin sfstrclose(sp);
1634887Schin return 0;
1644887Schin }
1654887Schin re->cur = re->buf;
1664887Schin re->size = n + ALIGN_BOUND2 - sizeof(Regex_t);
1674887Schin regalloc(re, block, REG_NOFREE);
1684887Schin c = regcomp(&re->re, s, REG_EXTENDED|REG_LENIENT|REG_NULL);
1694887Schin regalloc(NiL, NiL, 0);
1704887Schin } while (c == REG_ESPACE);
1714887Schin sfstrclose(sp);
1724887Schin if (c)
1734887Schin {
1744887Schin free(re);
1754887Schin return 0;
1764887Schin }
1774887Schin if (re->nsub = nsub)
1784887Schin memcpy(re->sub, sub, (nsub + 1) * sizeof(sub[0]));
1794887Schin return (char*)re;
1804887Schin }
1814887Schin
1824887Schin char*
regex(const char * handle,const char * subject,...)1834887Schin regex(const char* handle, const char* subject, ...)
1844887Schin {
1854887Schin register Regex_t* re;
1864887Schin register int n;
1874887Schin register int i;
1884887Schin register int k;
1894887Schin char* sub[SUB + 1];
1904887Schin regmatch_t match[SUB + 1];
1914887Schin va_list ap;
1924887Schin
1934887Schin va_start(ap, subject);
1944887Schin if (!(re = (Regex_t*)handle) || !subject)
1954887Schin return 0;
1964887Schin for (n = 0; n < re->nsub; n++)
1974887Schin sub[n] = va_arg(ap, char*);
1984887Schin va_end(ap);
1994887Schin if (regexec(&re->re, subject, SUB + 1, match, 0))
2004887Schin return 0;
2014887Schin for (n = 0; n < re->nsub; n++)
2024887Schin if (i = re->sub[n])
2034887Schin {
2044887Schin i--;
2054887Schin k = match[i].rm_eo - match[i].rm_so;
2064887Schin strncpy(sub[n], subject + match[i].rm_so, k);
2074887Schin *(sub[n] + k) = 0;
2084887Schin }
2094887Schin __loc1 = (char*)subject + match[0].rm_so;
2104887Schin return (char*)subject + match[0].rm_eo;
2114887Schin }
212