14887Schin /***********************************************************************
24887Schin * *
34887Schin * This software is part of the ast package *
4*12068SRoger.Faulkner@Oracle.COM * Copyright (c) 1985-2010 AT&T Intellectual Property *
54887Schin * and is licensed under the *
64887Schin * Common Public License, Version 1.0 *
78462SApril.Chin@Sun.COM * by AT&T Intellectual Property *
84887Schin * *
94887Schin * A copy of the License is available at *
104887Schin * http://www.opensource.org/licenses/cpl1.0.txt *
114887Schin * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
124887Schin * *
134887Schin * Information and Software Systems Research *
144887Schin * AT&T Research *
154887Schin * Florham Park NJ *
164887Schin * *
174887Schin * Glenn Fowler <gsf@research.att.com> *
184887Schin * David Korn <dgk@research.att.com> *
194887Schin * Phong Vo <kpv@research.att.com> *
204887Schin * *
214887Schin ***********************************************************************/
224887Schin #pragma prototyped
234887Schin /*
244887Schin * RE character class support
254887Schin */
264887Schin
274887Schin #include "reglib.h"
284887Schin
294887Schin struct Ctype_s; typedef struct Ctype_s Ctype_t;
304887Schin
314887Schin struct Ctype_s
324887Schin {
334887Schin const char* name;
344887Schin size_t size;
354887Schin regclass_t ctype;
364887Schin Ctype_t* next;
374887Schin #if _lib_wctype
384887Schin wctype_t wtype;
394887Schin #endif
404887Schin };
414887Schin
424887Schin static Ctype_t* ctypes;
434887Schin
444887Schin #define CTYPES 12
454887Schin #if _lib_wctype
464887Schin #define WTYPES 8
474887Schin #else
484887Schin #define WTYPES 0
494887Schin #endif
504887Schin
514887Schin /*
524887Schin * this stuff gets around posix failure to define isblank,
534887Schin * and the fact that ctype functions are macros
544887Schin * and any local extensions that may not even have functions or macros
554887Schin */
564887Schin
574887Schin #if _need_iswblank
584887Schin
594887Schin int
_reg_iswblank(wint_t wc)604887Schin _reg_iswblank(wint_t wc)
614887Schin {
624887Schin static int initialized;
634887Schin static wctype_t wt;
644887Schin
654887Schin if (!initialized)
664887Schin {
674887Schin initialized = 1;
684887Schin wt = wctype("blank");
694887Schin }
704887Schin return iswctype(wc, wt);
714887Schin }
724887Schin
734887Schin #endif
744887Schin
Isalnum(int c)754887Schin static int Isalnum(int c) { return iswalnum(c); }
Isalpha(int c)764887Schin static int Isalpha(int c) { return iswalpha(c); }
Isblank(int c)774887Schin static int Isblank(int c) { return iswblank(c); }
Iscntrl(int c)784887Schin static int Iscntrl(int c) { return iswcntrl(c); }
Isdigit(int c)794887Schin static int Isdigit(int c) { return iswdigit(c); }
Notdigit(int c)804887Schin static int Notdigit(int c) { return !iswdigit(c); }
Isgraph(int c)814887Schin static int Isgraph(int c) { return iswgraph(c); }
Islower(int c)824887Schin static int Islower(int c) { return iswlower(c); }
Isprint(int c)834887Schin static int Isprint(int c) { return iswprint(c); }
Ispunct(int c)844887Schin static int Ispunct(int c) { return iswpunct(c); }
Isspace(int c)854887Schin static int Isspace(int c) { return iswspace(c); }
Notspace(int c)864887Schin static int Notspace(int c) { return !iswspace(c); }
Isupper(int c)874887Schin static int Isupper(int c) { return iswupper(c); }
Isword(int c)884887Schin static int Isword(int c) { return iswalnum(c) || c == '_'; }
Notword(int c)894887Schin static int Notword(int c) { return !iswalnum(c) && c != '_'; }
Isxdigit(int c)904887Schin static int Isxdigit(int c) { return iswxdigit(c);}
914887Schin
924887Schin #if _lib_wctype
934887Schin
944887Schin static int Is_wc_1(int);
954887Schin static int Is_wc_2(int);
964887Schin static int Is_wc_3(int);
974887Schin static int Is_wc_4(int);
984887Schin static int Is_wc_5(int);
994887Schin static int Is_wc_6(int);
1004887Schin static int Is_wc_7(int);
1014887Schin static int Is_wc_8(int);
1024887Schin
1034887Schin #endif
1044887Schin
1054887Schin #define SZ(s) s,(sizeof(s)-1)
1064887Schin
1074887Schin static Ctype_t ctype[] =
1084887Schin {
1094887Schin { SZ("alnum"), Isalnum },
1104887Schin { SZ("alpha"), Isalpha },
1114887Schin { SZ("blank"), Isblank },
1124887Schin { SZ("cntrl"), Iscntrl },
1134887Schin { SZ("digit"), Isdigit },
1144887Schin { SZ("graph"), Isgraph },
1154887Schin { SZ("lower"), Islower },
1164887Schin { SZ("print"), Isprint },
1174887Schin { SZ("punct"), Ispunct },
1184887Schin { SZ("space"), Isspace },
1194887Schin { SZ("upper"), Isupper },
1204887Schin { SZ("word"), Isword },
1214887Schin { SZ("xdigit"),Isxdigit},
1224887Schin #if _lib_wctype
1234887Schin { 0, 0, Is_wc_1 },
1244887Schin { 0, 0, Is_wc_2 },
1254887Schin { 0, 0, Is_wc_3 },
1264887Schin { 0, 0, Is_wc_4 },
1274887Schin { 0, 0, Is_wc_5 },
1284887Schin { 0, 0, Is_wc_6 },
1294887Schin { 0, 0, Is_wc_7 },
1304887Schin { 0, 0, Is_wc_8 },
1314887Schin #endif
1324887Schin };
1334887Schin
1344887Schin #if _lib_wctype
1354887Schin
Is_wc_1(int c)1364887Schin static int Is_wc_1(int c) { return iswctype(c, ctype[CTYPES+0].wtype); }
Is_wc_2(int c)1374887Schin static int Is_wc_2(int c) { return iswctype(c, ctype[CTYPES+1].wtype); }
Is_wc_3(int c)1384887Schin static int Is_wc_3(int c) { return iswctype(c, ctype[CTYPES+2].wtype); }
Is_wc_4(int c)1394887Schin static int Is_wc_4(int c) { return iswctype(c, ctype[CTYPES+3].wtype); }
Is_wc_5(int c)1404887Schin static int Is_wc_5(int c) { return iswctype(c, ctype[CTYPES+4].wtype); }
Is_wc_6(int c)1414887Schin static int Is_wc_6(int c) { return iswctype(c, ctype[CTYPES+5].wtype); }
Is_wc_7(int c)1424887Schin static int Is_wc_7(int c) { return iswctype(c, ctype[CTYPES+6].wtype); }
Is_wc_8(int c)1434887Schin static int Is_wc_8(int c) { return iswctype(c, ctype[CTYPES+7].wtype); }
1444887Schin
1454887Schin #endif
1464887Schin
1474887Schin /*
1484887Schin * return pointer to ctype function for :class:] in s
1494887Schin * s points to the first char after the initial [
1504887Schin * if e!=0 it points to next char in s
1514887Schin * 0 returned on error
1524887Schin */
1534887Schin
1544887Schin regclass_t
regclass(const char * s,char ** e)1554887Schin regclass(const char* s, char** e)
1564887Schin {
1574887Schin register Ctype_t* cp;
1584887Schin register int c;
1594887Schin register size_t n;
1604887Schin register const char* t;
1614887Schin
1624887Schin if (c = *s++)
1634887Schin {
1644887Schin for (t = s; *t && (*t != c || *(t + 1) != ']'); t++);
1654887Schin if (*t != c)
1664887Schin return 0;
1674887Schin n = t - s;
1684887Schin for (cp = ctypes; cp; cp = cp->next)
1694887Schin if (n == cp->size && strneq(s, cp->name, n))
1704887Schin goto found;
1714887Schin for (cp = ctype; cp < &ctype[elementsof(ctype)]; cp++)
1724887Schin {
1734887Schin #if _lib_wctype
1744887Schin if (!cp->size && (cp->name = (const char*)memdup(s, n + 1)))
1754887Schin {
1764887Schin *((char*)cp->name + n) = 0;
1774887Schin /* mvs.390 needs the (char*) cast -- barf */
1784887Schin if (!(cp->wtype = wctype((char*)cp->name)))
1794887Schin {
1804887Schin free((char*)cp->name);
1814887Schin return 0;
1824887Schin }
1834887Schin cp->size = n;
1844887Schin goto found;
1854887Schin }
1864887Schin #endif
1874887Schin if (n == cp->size && strneq(s, cp->name, n))
1884887Schin goto found;
1894887Schin }
1904887Schin }
1914887Schin return 0;
1924887Schin found:
1934887Schin if (e)
1944887Schin *e = (char*)t + 2;
1954887Schin return cp->ctype;
1964887Schin }
1974887Schin
1984887Schin /*
1994887Schin * associate the ctype function fun with name
2004887Schin */
2014887Schin
2024887Schin int
regaddclass(const char * name,regclass_t fun)2034887Schin regaddclass(const char* name, regclass_t fun)
2044887Schin {
2054887Schin register Ctype_t* cp;
2064887Schin register Ctype_t* np;
2074887Schin register size_t n;
2084887Schin
2094887Schin n = strlen(name);
2104887Schin for (cp = ctypes; cp; cp = cp->next)
2114887Schin if (cp->size == n && strneq(name, cp->name, n))
2124887Schin {
2134887Schin cp->ctype = fun;
2144887Schin return 0;
2154887Schin }
2164887Schin if (!(np = newof(0, Ctype_t, 1, n + 1)))
2174887Schin return REG_ESPACE;
2184887Schin np->size = n;
2194887Schin np->name = strcpy((char*)(np + 1), name);
2204887Schin np->ctype = fun;
2214887Schin np->next = ctypes;
2224887Schin ctypes = np;
2234887Schin return 0;
2244887Schin }
2254887Schin
2264887Schin /*
2274887Schin * return pointer to ctype function for token
2284887Schin */
2294887Schin
2304887Schin regclass_t
classfun(int type)2314887Schin classfun(int type)
2324887Schin {
2334887Schin switch (type)
2344887Schin {
2354887Schin case T_ALNUM: return Isword;
2364887Schin case T_ALNUM_NOT: return Notword;
2374887Schin case T_DIGIT: return Isdigit;
2384887Schin case T_DIGIT_NOT: return Notdigit;
2394887Schin case T_SPACE: return Isspace;
2404887Schin case T_SPACE_NOT: return Notspace;
2414887Schin }
2424887Schin return 0;
2434887Schin }
244