1*4887Schin /*********************************************************************** 2*4887Schin * * 3*4887Schin * This software is part of the ast package * 4*4887Schin * Copyright (c) 1985-2007 AT&T Knowledge Ventures * 5*4887Schin * and is licensed under the * 6*4887Schin * Common Public License, Version 1.0 * 7*4887Schin * by AT&T Knowledge Ventures * 8*4887Schin * * 9*4887Schin * A copy of the License is available at * 10*4887Schin * http://www.opensource.org/licenses/cpl1.0.txt * 11*4887Schin * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 12*4887Schin * * 13*4887Schin * Information and Software Systems Research * 14*4887Schin * AT&T Research * 15*4887Schin * Florham Park NJ * 16*4887Schin * * 17*4887Schin * Glenn Fowler <gsf@research.att.com> * 18*4887Schin * David Korn <dgk@research.att.com> * 19*4887Schin * Phong Vo <kpv@research.att.com> * 20*4887Schin * * 21*4887Schin ***********************************************************************/ 22*4887Schin #pragma prototyped 23*4887Schin /* 24*4887Schin * RE character class support 25*4887Schin */ 26*4887Schin 27*4887Schin #include "reglib.h" 28*4887Schin 29*4887Schin struct Ctype_s; typedef struct Ctype_s Ctype_t; 30*4887Schin 31*4887Schin struct Ctype_s 32*4887Schin { 33*4887Schin const char* name; 34*4887Schin size_t size; 35*4887Schin regclass_t ctype; 36*4887Schin Ctype_t* next; 37*4887Schin #if _lib_wctype 38*4887Schin wctype_t wtype; 39*4887Schin #endif 40*4887Schin }; 41*4887Schin 42*4887Schin static Ctype_t* ctypes; 43*4887Schin 44*4887Schin #define CTYPES 12 45*4887Schin #if _lib_wctype 46*4887Schin #define WTYPES 8 47*4887Schin #else 48*4887Schin #define WTYPES 0 49*4887Schin #endif 50*4887Schin 51*4887Schin /* 52*4887Schin * this stuff gets around posix failure to define isblank, 53*4887Schin * and the fact that ctype functions are macros 54*4887Schin * and any local extensions that may not even have functions or macros 55*4887Schin */ 56*4887Schin 57*4887Schin #if _need_iswblank 58*4887Schin 59*4887Schin int 60*4887Schin _reg_iswblank(wint_t wc) 61*4887Schin { 62*4887Schin static int initialized; 63*4887Schin static wctype_t wt; 64*4887Schin 65*4887Schin if (!initialized) 66*4887Schin { 67*4887Schin initialized = 1; 68*4887Schin wt = wctype("blank"); 69*4887Schin } 70*4887Schin return iswctype(wc, wt); 71*4887Schin } 72*4887Schin 73*4887Schin #endif 74*4887Schin 75*4887Schin static int Isalnum(int c) { return iswalnum(c); } 76*4887Schin static int Isalpha(int c) { return iswalpha(c); } 77*4887Schin static int Isblank(int c) { return iswblank(c); } 78*4887Schin static int Iscntrl(int c) { return iswcntrl(c); } 79*4887Schin static int Isdigit(int c) { return iswdigit(c); } 80*4887Schin static int Notdigit(int c) { return !iswdigit(c); } 81*4887Schin static int Isgraph(int c) { return iswgraph(c); } 82*4887Schin static int Islower(int c) { return iswlower(c); } 83*4887Schin static int Isprint(int c) { return iswprint(c); } 84*4887Schin static int Ispunct(int c) { return iswpunct(c); } 85*4887Schin static int Isspace(int c) { return iswspace(c); } 86*4887Schin static int Notspace(int c) { return !iswspace(c); } 87*4887Schin static int Isupper(int c) { return iswupper(c); } 88*4887Schin static int Isword(int c) { return iswalnum(c) || c == '_'; } 89*4887Schin static int Notword(int c) { return !iswalnum(c) && c != '_'; } 90*4887Schin static int Isxdigit(int c) { return iswxdigit(c);} 91*4887Schin 92*4887Schin #if _lib_wctype 93*4887Schin 94*4887Schin static int Is_wc_1(int); 95*4887Schin static int Is_wc_2(int); 96*4887Schin static int Is_wc_3(int); 97*4887Schin static int Is_wc_4(int); 98*4887Schin static int Is_wc_5(int); 99*4887Schin static int Is_wc_6(int); 100*4887Schin static int Is_wc_7(int); 101*4887Schin static int Is_wc_8(int); 102*4887Schin 103*4887Schin #endif 104*4887Schin 105*4887Schin #define SZ(s) s,(sizeof(s)-1) 106*4887Schin 107*4887Schin static Ctype_t ctype[] = 108*4887Schin { 109*4887Schin { SZ("alnum"), Isalnum }, 110*4887Schin { SZ("alpha"), Isalpha }, 111*4887Schin { SZ("blank"), Isblank }, 112*4887Schin { SZ("cntrl"), Iscntrl }, 113*4887Schin { SZ("digit"), Isdigit }, 114*4887Schin { SZ("graph"), Isgraph }, 115*4887Schin { SZ("lower"), Islower }, 116*4887Schin { SZ("print"), Isprint }, 117*4887Schin { SZ("punct"), Ispunct }, 118*4887Schin { SZ("space"), Isspace }, 119*4887Schin { SZ("upper"), Isupper }, 120*4887Schin { SZ("word"), Isword }, 121*4887Schin { SZ("xdigit"),Isxdigit}, 122*4887Schin #if _lib_wctype 123*4887Schin { 0, 0, Is_wc_1 }, 124*4887Schin { 0, 0, Is_wc_2 }, 125*4887Schin { 0, 0, Is_wc_3 }, 126*4887Schin { 0, 0, Is_wc_4 }, 127*4887Schin { 0, 0, Is_wc_5 }, 128*4887Schin { 0, 0, Is_wc_6 }, 129*4887Schin { 0, 0, Is_wc_7 }, 130*4887Schin { 0, 0, Is_wc_8 }, 131*4887Schin #endif 132*4887Schin }; 133*4887Schin 134*4887Schin #if _lib_wctype 135*4887Schin 136*4887Schin static int Is_wc_1(int c) { return iswctype(c, ctype[CTYPES+0].wtype); } 137*4887Schin static int Is_wc_2(int c) { return iswctype(c, ctype[CTYPES+1].wtype); } 138*4887Schin static int Is_wc_3(int c) { return iswctype(c, ctype[CTYPES+2].wtype); } 139*4887Schin static int Is_wc_4(int c) { return iswctype(c, ctype[CTYPES+3].wtype); } 140*4887Schin static int Is_wc_5(int c) { return iswctype(c, ctype[CTYPES+4].wtype); } 141*4887Schin static int Is_wc_6(int c) { return iswctype(c, ctype[CTYPES+5].wtype); } 142*4887Schin static int Is_wc_7(int c) { return iswctype(c, ctype[CTYPES+6].wtype); } 143*4887Schin static int Is_wc_8(int c) { return iswctype(c, ctype[CTYPES+7].wtype); } 144*4887Schin 145*4887Schin #endif 146*4887Schin 147*4887Schin /* 148*4887Schin * return pointer to ctype function for :class:] in s 149*4887Schin * s points to the first char after the initial [ 150*4887Schin * if e!=0 it points to next char in s 151*4887Schin * 0 returned on error 152*4887Schin */ 153*4887Schin 154*4887Schin regclass_t 155*4887Schin regclass(const char* s, char** e) 156*4887Schin { 157*4887Schin register Ctype_t* cp; 158*4887Schin register int c; 159*4887Schin register size_t n; 160*4887Schin register const char* t; 161*4887Schin 162*4887Schin if (c = *s++) 163*4887Schin { 164*4887Schin for (t = s; *t && (*t != c || *(t + 1) != ']'); t++); 165*4887Schin if (*t != c) 166*4887Schin return 0; 167*4887Schin n = t - s; 168*4887Schin for (cp = ctypes; cp; cp = cp->next) 169*4887Schin if (n == cp->size && strneq(s, cp->name, n)) 170*4887Schin goto found; 171*4887Schin for (cp = ctype; cp < &ctype[elementsof(ctype)]; cp++) 172*4887Schin { 173*4887Schin #if _lib_wctype 174*4887Schin if (!cp->size && (cp->name = (const char*)memdup(s, n + 1))) 175*4887Schin { 176*4887Schin *((char*)cp->name + n) = 0; 177*4887Schin /* mvs.390 needs the (char*) cast -- barf */ 178*4887Schin if (!(cp->wtype = wctype((char*)cp->name))) 179*4887Schin { 180*4887Schin free((char*)cp->name); 181*4887Schin return 0; 182*4887Schin } 183*4887Schin cp->size = n; 184*4887Schin goto found; 185*4887Schin } 186*4887Schin #endif 187*4887Schin if (n == cp->size && strneq(s, cp->name, n)) 188*4887Schin goto found; 189*4887Schin } 190*4887Schin } 191*4887Schin return 0; 192*4887Schin found: 193*4887Schin if (e) 194*4887Schin *e = (char*)t + 2; 195*4887Schin return cp->ctype; 196*4887Schin } 197*4887Schin 198*4887Schin /* 199*4887Schin * associate the ctype function fun with name 200*4887Schin */ 201*4887Schin 202*4887Schin int 203*4887Schin regaddclass(const char* name, regclass_t fun) 204*4887Schin { 205*4887Schin register Ctype_t* cp; 206*4887Schin register Ctype_t* np; 207*4887Schin register size_t n; 208*4887Schin 209*4887Schin n = strlen(name); 210*4887Schin for (cp = ctypes; cp; cp = cp->next) 211*4887Schin if (cp->size == n && strneq(name, cp->name, n)) 212*4887Schin { 213*4887Schin cp->ctype = fun; 214*4887Schin return 0; 215*4887Schin } 216*4887Schin if (!(np = newof(0, Ctype_t, 1, n + 1))) 217*4887Schin return REG_ESPACE; 218*4887Schin np->size = n; 219*4887Schin np->name = strcpy((char*)(np + 1), name); 220*4887Schin np->ctype = fun; 221*4887Schin np->next = ctypes; 222*4887Schin ctypes = np; 223*4887Schin return 0; 224*4887Schin } 225*4887Schin 226*4887Schin /* 227*4887Schin * return pointer to ctype function for token 228*4887Schin */ 229*4887Schin 230*4887Schin regclass_t 231*4887Schin classfun(int type) 232*4887Schin { 233*4887Schin switch (type) 234*4887Schin { 235*4887Schin case T_ALNUM: return Isword; 236*4887Schin case T_ALNUM_NOT: return Notword; 237*4887Schin case T_DIGIT: return Isdigit; 238*4887Schin case T_DIGIT_NOT: return Notdigit; 239*4887Schin case T_SPACE: return Isspace; 240*4887Schin case T_SPACE_NOT: return Notspace; 241*4887Schin } 242*4887Schin return 0; 243*4887Schin } 244