14887Schin /*********************************************************************** 24887Schin * * 34887Schin * This software is part of the ast package * 4*8462SApril.Chin@Sun.COM * Copyright (c) 1985-2008 AT&T Intellectual Property * 54887Schin * and is licensed under the * 64887Schin * Common Public License, Version 1.0 * 7*8462SApril.Chin@Sun.COM * by AT&T Intellectual Property * 84887Schin * * 94887Schin * A copy of the License is available at * 104887Schin * http://www.opensource.org/licenses/cpl1.0.txt * 114887Schin * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 124887Schin * * 134887Schin * Information and Software Systems Research * 144887Schin * AT&T Research * 154887Schin * Florham Park NJ * 164887Schin * * 174887Schin * Glenn Fowler <gsf@research.att.com> * 184887Schin * David Korn <dgk@research.att.com> * 194887Schin * Phong Vo <kpv@research.att.com> * 204887Schin * * 214887Schin ***********************************************************************/ 224887Schin #pragma prototyped 234887Schin /* 244887Schin * RE character class support 254887Schin */ 264887Schin 274887Schin #include "reglib.h" 284887Schin 294887Schin struct Ctype_s; typedef struct Ctype_s Ctype_t; 304887Schin 314887Schin struct Ctype_s 324887Schin { 334887Schin const char* name; 344887Schin size_t size; 354887Schin regclass_t ctype; 364887Schin Ctype_t* next; 374887Schin #if _lib_wctype 384887Schin wctype_t wtype; 394887Schin #endif 404887Schin }; 414887Schin 424887Schin static Ctype_t* ctypes; 434887Schin 444887Schin #define CTYPES 12 454887Schin #if _lib_wctype 464887Schin #define WTYPES 8 474887Schin #else 484887Schin #define WTYPES 0 494887Schin #endif 504887Schin 514887Schin /* 524887Schin * this stuff gets around posix failure to define isblank, 534887Schin * and the fact that ctype functions are macros 544887Schin * and any local extensions that may not even have functions or macros 554887Schin */ 564887Schin 574887Schin #if _need_iswblank 584887Schin 594887Schin int 604887Schin _reg_iswblank(wint_t wc) 614887Schin { 624887Schin static int initialized; 634887Schin static wctype_t wt; 644887Schin 654887Schin if (!initialized) 664887Schin { 674887Schin initialized = 1; 684887Schin wt = wctype("blank"); 694887Schin } 704887Schin return iswctype(wc, wt); 714887Schin } 724887Schin 734887Schin #endif 744887Schin 754887Schin static int Isalnum(int c) { return iswalnum(c); } 764887Schin static int Isalpha(int c) { return iswalpha(c); } 774887Schin static int Isblank(int c) { return iswblank(c); } 784887Schin static int Iscntrl(int c) { return iswcntrl(c); } 794887Schin static int Isdigit(int c) { return iswdigit(c); } 804887Schin static int Notdigit(int c) { return !iswdigit(c); } 814887Schin static int Isgraph(int c) { return iswgraph(c); } 824887Schin static int Islower(int c) { return iswlower(c); } 834887Schin static int Isprint(int c) { return iswprint(c); } 844887Schin static int Ispunct(int c) { return iswpunct(c); } 854887Schin static int Isspace(int c) { return iswspace(c); } 864887Schin static int Notspace(int c) { return !iswspace(c); } 874887Schin static int Isupper(int c) { return iswupper(c); } 884887Schin static int Isword(int c) { return iswalnum(c) || c == '_'; } 894887Schin static int Notword(int c) { return !iswalnum(c) && c != '_'; } 904887Schin static int Isxdigit(int c) { return iswxdigit(c);} 914887Schin 924887Schin #if _lib_wctype 934887Schin 944887Schin static int Is_wc_1(int); 954887Schin static int Is_wc_2(int); 964887Schin static int Is_wc_3(int); 974887Schin static int Is_wc_4(int); 984887Schin static int Is_wc_5(int); 994887Schin static int Is_wc_6(int); 1004887Schin static int Is_wc_7(int); 1014887Schin static int Is_wc_8(int); 1024887Schin 1034887Schin #endif 1044887Schin 1054887Schin #define SZ(s) s,(sizeof(s)-1) 1064887Schin 1074887Schin static Ctype_t ctype[] = 1084887Schin { 1094887Schin { SZ("alnum"), Isalnum }, 1104887Schin { SZ("alpha"), Isalpha }, 1114887Schin { SZ("blank"), Isblank }, 1124887Schin { SZ("cntrl"), Iscntrl }, 1134887Schin { SZ("digit"), Isdigit }, 1144887Schin { SZ("graph"), Isgraph }, 1154887Schin { SZ("lower"), Islower }, 1164887Schin { SZ("print"), Isprint }, 1174887Schin { SZ("punct"), Ispunct }, 1184887Schin { SZ("space"), Isspace }, 1194887Schin { SZ("upper"), Isupper }, 1204887Schin { SZ("word"), Isword }, 1214887Schin { SZ("xdigit"),Isxdigit}, 1224887Schin #if _lib_wctype 1234887Schin { 0, 0, Is_wc_1 }, 1244887Schin { 0, 0, Is_wc_2 }, 1254887Schin { 0, 0, Is_wc_3 }, 1264887Schin { 0, 0, Is_wc_4 }, 1274887Schin { 0, 0, Is_wc_5 }, 1284887Schin { 0, 0, Is_wc_6 }, 1294887Schin { 0, 0, Is_wc_7 }, 1304887Schin { 0, 0, Is_wc_8 }, 1314887Schin #endif 1324887Schin }; 1334887Schin 1344887Schin #if _lib_wctype 1354887Schin 1364887Schin static int Is_wc_1(int c) { return iswctype(c, ctype[CTYPES+0].wtype); } 1374887Schin static int Is_wc_2(int c) { return iswctype(c, ctype[CTYPES+1].wtype); } 1384887Schin static int Is_wc_3(int c) { return iswctype(c, ctype[CTYPES+2].wtype); } 1394887Schin static int Is_wc_4(int c) { return iswctype(c, ctype[CTYPES+3].wtype); } 1404887Schin static int Is_wc_5(int c) { return iswctype(c, ctype[CTYPES+4].wtype); } 1414887Schin static int Is_wc_6(int c) { return iswctype(c, ctype[CTYPES+5].wtype); } 1424887Schin static int Is_wc_7(int c) { return iswctype(c, ctype[CTYPES+6].wtype); } 1434887Schin static int Is_wc_8(int c) { return iswctype(c, ctype[CTYPES+7].wtype); } 1444887Schin 1454887Schin #endif 1464887Schin 1474887Schin /* 1484887Schin * return pointer to ctype function for :class:] in s 1494887Schin * s points to the first char after the initial [ 1504887Schin * if e!=0 it points to next char in s 1514887Schin * 0 returned on error 1524887Schin */ 1534887Schin 1544887Schin regclass_t 1554887Schin regclass(const char* s, char** e) 1564887Schin { 1574887Schin register Ctype_t* cp; 1584887Schin register int c; 1594887Schin register size_t n; 1604887Schin register const char* t; 1614887Schin 1624887Schin if (c = *s++) 1634887Schin { 1644887Schin for (t = s; *t && (*t != c || *(t + 1) != ']'); t++); 1654887Schin if (*t != c) 1664887Schin return 0; 1674887Schin n = t - s; 1684887Schin for (cp = ctypes; cp; cp = cp->next) 1694887Schin if (n == cp->size && strneq(s, cp->name, n)) 1704887Schin goto found; 1714887Schin for (cp = ctype; cp < &ctype[elementsof(ctype)]; cp++) 1724887Schin { 1734887Schin #if _lib_wctype 1744887Schin if (!cp->size && (cp->name = (const char*)memdup(s, n + 1))) 1754887Schin { 1764887Schin *((char*)cp->name + n) = 0; 1774887Schin /* mvs.390 needs the (char*) cast -- barf */ 1784887Schin if (!(cp->wtype = wctype((char*)cp->name))) 1794887Schin { 1804887Schin free((char*)cp->name); 1814887Schin return 0; 1824887Schin } 1834887Schin cp->size = n; 1844887Schin goto found; 1854887Schin } 1864887Schin #endif 1874887Schin if (n == cp->size && strneq(s, cp->name, n)) 1884887Schin goto found; 1894887Schin } 1904887Schin } 1914887Schin return 0; 1924887Schin found: 1934887Schin if (e) 1944887Schin *e = (char*)t + 2; 1954887Schin return cp->ctype; 1964887Schin } 1974887Schin 1984887Schin /* 1994887Schin * associate the ctype function fun with name 2004887Schin */ 2014887Schin 2024887Schin int 2034887Schin regaddclass(const char* name, regclass_t fun) 2044887Schin { 2054887Schin register Ctype_t* cp; 2064887Schin register Ctype_t* np; 2074887Schin register size_t n; 2084887Schin 2094887Schin n = strlen(name); 2104887Schin for (cp = ctypes; cp; cp = cp->next) 2114887Schin if (cp->size == n && strneq(name, cp->name, n)) 2124887Schin { 2134887Schin cp->ctype = fun; 2144887Schin return 0; 2154887Schin } 2164887Schin if (!(np = newof(0, Ctype_t, 1, n + 1))) 2174887Schin return REG_ESPACE; 2184887Schin np->size = n; 2194887Schin np->name = strcpy((char*)(np + 1), name); 2204887Schin np->ctype = fun; 2214887Schin np->next = ctypes; 2224887Schin ctypes = np; 2234887Schin return 0; 2244887Schin } 2254887Schin 2264887Schin /* 2274887Schin * return pointer to ctype function for token 2284887Schin */ 2294887Schin 2304887Schin regclass_t 2314887Schin classfun(int type) 2324887Schin { 2334887Schin switch (type) 2344887Schin { 2354887Schin case T_ALNUM: return Isword; 2364887Schin case T_ALNUM_NOT: return Notword; 2374887Schin case T_DIGIT: return Isdigit; 2384887Schin case T_DIGIT_NOT: return Notdigit; 2394887Schin case T_SPACE: return Isspace; 2404887Schin case T_SPACE_NOT: return Notspace; 2414887Schin } 2424887Schin return 0; 2434887Schin } 244