13e12c5d1SDavid du Colombier #include <u.h> 23e12c5d1SDavid du Colombier #include <libc.h> 33e12c5d1SDavid du Colombier #include "regexp.h" 43e12c5d1SDavid du Colombier #include "regcomp.h" 53e12c5d1SDavid du Colombier 63e12c5d1SDavid du Colombier #define TRUE 1 73e12c5d1SDavid du Colombier #define FALSE 0 83e12c5d1SDavid du Colombier 93e12c5d1SDavid du Colombier /* 103e12c5d1SDavid du Colombier * Parser Information 113e12c5d1SDavid du Colombier */ 123e12c5d1SDavid du Colombier typedef 133e12c5d1SDavid du Colombier struct Node 143e12c5d1SDavid du Colombier { 153e12c5d1SDavid du Colombier Reinst* first; 163e12c5d1SDavid du Colombier Reinst* last; 173e12c5d1SDavid du Colombier }Node; 183e12c5d1SDavid du Colombier 19*3468a491SDavid du Colombier /* max character classes per program is nelem(reprog->class) */ 20*3468a491SDavid du Colombier static Reprog *reprog; 21*3468a491SDavid du Colombier 22*3468a491SDavid du Colombier /* max rune ranges per character class is nelem(classp->spans)/2 */ 23*3468a491SDavid du Colombier #define NCCRUNE nelem(classp->spans) 24*3468a491SDavid du Colombier 253e12c5d1SDavid du Colombier #define NSTACK 20 263e12c5d1SDavid du Colombier static Node andstack[NSTACK]; 273e12c5d1SDavid du Colombier static Node *andp; 283e12c5d1SDavid du Colombier static int atorstack[NSTACK]; 293e12c5d1SDavid du Colombier static int* atorp; 303e12c5d1SDavid du Colombier static int cursubid; /* id of current subexpression */ 313e12c5d1SDavid du Colombier static int subidstack[NSTACK]; /* parallel to atorstack */ 323e12c5d1SDavid du Colombier static int* subidp; 333e12c5d1SDavid du Colombier static int lastwasand; /* Last token was operand */ 343e12c5d1SDavid du Colombier static int nbra; 353e12c5d1SDavid du Colombier static char* exprp; /* pointer to next character in source expression */ 363e12c5d1SDavid du Colombier static int lexdone; 373e12c5d1SDavid du Colombier static int nclass; 383e12c5d1SDavid du Colombier static Reclass*classp; 393e12c5d1SDavid du Colombier static Reinst* freep; 403e12c5d1SDavid du Colombier static int errors; 413e12c5d1SDavid du Colombier static Rune yyrune; /* last lex'd rune */ 423e12c5d1SDavid du Colombier static Reclass*yyclassp; /* last lex'd class */ 433e12c5d1SDavid du Colombier 443e12c5d1SDavid du Colombier /* predeclared crap */ 453e12c5d1SDavid du Colombier static void operator(int); 463e12c5d1SDavid du Colombier static void pushand(Reinst*, Reinst*); 473e12c5d1SDavid du Colombier static void pushator(int); 483e12c5d1SDavid du Colombier static void evaluntil(int); 493e12c5d1SDavid du Colombier static int bldcclass(void); 503e12c5d1SDavid du Colombier 513e12c5d1SDavid du Colombier static jmp_buf regkaboom; 523e12c5d1SDavid du Colombier 533e12c5d1SDavid du Colombier static void 543e12c5d1SDavid du Colombier rcerror(char *s) 553e12c5d1SDavid du Colombier { 563e12c5d1SDavid du Colombier errors++; 573e12c5d1SDavid du Colombier regerror(s); 583e12c5d1SDavid du Colombier longjmp(regkaboom, 1); 593e12c5d1SDavid du Colombier } 603e12c5d1SDavid du Colombier 613e12c5d1SDavid du Colombier static Reinst* 623e12c5d1SDavid du Colombier newinst(int t) 633e12c5d1SDavid du Colombier { 643e12c5d1SDavid du Colombier freep->type = t; 653e12c5d1SDavid du Colombier freep->left = 0; 663e12c5d1SDavid du Colombier freep->right = 0; 673e12c5d1SDavid du Colombier return freep++; 683e12c5d1SDavid du Colombier } 693e12c5d1SDavid du Colombier 703e12c5d1SDavid du Colombier static void 713e12c5d1SDavid du Colombier operand(int t) 723e12c5d1SDavid du Colombier { 733e12c5d1SDavid du Colombier Reinst *i; 743e12c5d1SDavid du Colombier 753e12c5d1SDavid du Colombier if(lastwasand) 763e12c5d1SDavid du Colombier operator(CAT); /* catenate is implicit */ 773e12c5d1SDavid du Colombier i = newinst(t); 783e12c5d1SDavid du Colombier 793e12c5d1SDavid du Colombier if(t == CCLASS || t == NCCLASS) 803e12c5d1SDavid du Colombier i->cp = yyclassp; 813e12c5d1SDavid du Colombier if(t == RUNE) 823e12c5d1SDavid du Colombier i->r = yyrune; 833e12c5d1SDavid du Colombier 843e12c5d1SDavid du Colombier pushand(i, i); 853e12c5d1SDavid du Colombier lastwasand = TRUE; 863e12c5d1SDavid du Colombier } 873e12c5d1SDavid du Colombier 883e12c5d1SDavid du Colombier static void 893e12c5d1SDavid du Colombier operator(int t) 903e12c5d1SDavid du Colombier { 913e12c5d1SDavid du Colombier if(t==RBRA && --nbra<0) 923e12c5d1SDavid du Colombier rcerror("unmatched right paren"); 933e12c5d1SDavid du Colombier if(t==LBRA){ 943e12c5d1SDavid du Colombier if(++cursubid >= NSUBEXP) 953e12c5d1SDavid du Colombier rcerror ("too many subexpressions"); 963e12c5d1SDavid du Colombier nbra++; 973e12c5d1SDavid du Colombier if(lastwasand) 983e12c5d1SDavid du Colombier operator(CAT); 993e12c5d1SDavid du Colombier } else 1003e12c5d1SDavid du Colombier evaluntil(t); 1013e12c5d1SDavid du Colombier if(t != RBRA) 1023e12c5d1SDavid du Colombier pushator(t); 1033e12c5d1SDavid du Colombier lastwasand = FALSE; 1043e12c5d1SDavid du Colombier if(t==STAR || t==QUEST || t==PLUS || t==RBRA) 1053e12c5d1SDavid du Colombier lastwasand = TRUE; /* these look like operands */ 1063e12c5d1SDavid du Colombier } 1073e12c5d1SDavid du Colombier 1083e12c5d1SDavid du Colombier static void 1093e12c5d1SDavid du Colombier regerr2(char *s, int c) 1103e12c5d1SDavid du Colombier { 1113e12c5d1SDavid du Colombier char buf[100]; 1123e12c5d1SDavid du Colombier char *cp = buf; 1133e12c5d1SDavid du Colombier while(*s) 1143e12c5d1SDavid du Colombier *cp++ = *s++; 1153e12c5d1SDavid du Colombier *cp++ = c; 1163e12c5d1SDavid du Colombier *cp = '\0'; 1173e12c5d1SDavid du Colombier rcerror(buf); 1183e12c5d1SDavid du Colombier } 1193e12c5d1SDavid du Colombier 1203e12c5d1SDavid du Colombier static void 1213e12c5d1SDavid du Colombier cant(char *s) 1223e12c5d1SDavid du Colombier { 1233e12c5d1SDavid du Colombier char buf[100]; 1243e12c5d1SDavid du Colombier strcpy(buf, "can't happen: "); 1253e12c5d1SDavid du Colombier strcat(buf, s); 1263e12c5d1SDavid du Colombier rcerror(buf); 1273e12c5d1SDavid du Colombier } 1283e12c5d1SDavid du Colombier 1293e12c5d1SDavid du Colombier static void 1303e12c5d1SDavid du Colombier pushand(Reinst *f, Reinst *l) 1313e12c5d1SDavid du Colombier { 1323e12c5d1SDavid du Colombier if(andp >= &andstack[NSTACK]) 1333e12c5d1SDavid du Colombier cant("operand stack overflow"); 1343e12c5d1SDavid du Colombier andp->first = f; 1353e12c5d1SDavid du Colombier andp->last = l; 1363e12c5d1SDavid du Colombier andp++; 1373e12c5d1SDavid du Colombier } 1383e12c5d1SDavid du Colombier 1393e12c5d1SDavid du Colombier static void 1403e12c5d1SDavid du Colombier pushator(int t) 1413e12c5d1SDavid du Colombier { 1423e12c5d1SDavid du Colombier if(atorp >= &atorstack[NSTACK]) 1433e12c5d1SDavid du Colombier cant("operator stack overflow"); 1443e12c5d1SDavid du Colombier *atorp++ = t; 1453e12c5d1SDavid du Colombier *subidp++ = cursubid; 1463e12c5d1SDavid du Colombier } 1473e12c5d1SDavid du Colombier 1483e12c5d1SDavid du Colombier static Node* 1493e12c5d1SDavid du Colombier popand(int op) 1503e12c5d1SDavid du Colombier { 1513e12c5d1SDavid du Colombier Reinst *inst; 1523e12c5d1SDavid du Colombier 1533e12c5d1SDavid du Colombier if(andp <= &andstack[0]){ 1543e12c5d1SDavid du Colombier regerr2("missing operand for ", op); 1553e12c5d1SDavid du Colombier inst = newinst(NOP); 1563e12c5d1SDavid du Colombier pushand(inst,inst); 1573e12c5d1SDavid du Colombier } 1583e12c5d1SDavid du Colombier return --andp; 1593e12c5d1SDavid du Colombier } 1603e12c5d1SDavid du Colombier 1613e12c5d1SDavid du Colombier static int 1623e12c5d1SDavid du Colombier popator(void) 1633e12c5d1SDavid du Colombier { 1643e12c5d1SDavid du Colombier if(atorp <= &atorstack[0]) 1653e12c5d1SDavid du Colombier cant("operator stack underflow"); 1663e12c5d1SDavid du Colombier --subidp; 1673e12c5d1SDavid du Colombier return *--atorp; 1683e12c5d1SDavid du Colombier } 1693e12c5d1SDavid du Colombier 1703e12c5d1SDavid du Colombier static void 1713e12c5d1SDavid du Colombier evaluntil(int pri) 1723e12c5d1SDavid du Colombier { 1733e12c5d1SDavid du Colombier Node *op1, *op2; 1743e12c5d1SDavid du Colombier Reinst *inst1, *inst2; 1753e12c5d1SDavid du Colombier 1763e12c5d1SDavid du Colombier while(pri==RBRA || atorp[-1]>=pri){ 1773e12c5d1SDavid du Colombier switch(popator()){ 1783e12c5d1SDavid du Colombier default: 1793e12c5d1SDavid du Colombier rcerror("unknown operator in evaluntil"); 1803e12c5d1SDavid du Colombier break; 1813e12c5d1SDavid du Colombier case LBRA: /* must have been RBRA */ 1823e12c5d1SDavid du Colombier op1 = popand('('); 1833e12c5d1SDavid du Colombier inst2 = newinst(RBRA); 1843e12c5d1SDavid du Colombier inst2->subid = *subidp; 1853e12c5d1SDavid du Colombier op1->last->next = inst2; 1863e12c5d1SDavid du Colombier inst1 = newinst(LBRA); 1873e12c5d1SDavid du Colombier inst1->subid = *subidp; 1883e12c5d1SDavid du Colombier inst1->next = op1->first; 1893e12c5d1SDavid du Colombier pushand(inst1, inst2); 1903e12c5d1SDavid du Colombier return; 1913e12c5d1SDavid du Colombier case OR: 1923e12c5d1SDavid du Colombier op2 = popand('|'); 1933e12c5d1SDavid du Colombier op1 = popand('|'); 1943e12c5d1SDavid du Colombier inst2 = newinst(NOP); 1953e12c5d1SDavid du Colombier op2->last->next = inst2; 1963e12c5d1SDavid du Colombier op1->last->next = inst2; 1973e12c5d1SDavid du Colombier inst1 = newinst(OR); 1983e12c5d1SDavid du Colombier inst1->right = op1->first; 1993e12c5d1SDavid du Colombier inst1->left = op2->first; 2003e12c5d1SDavid du Colombier pushand(inst1, inst2); 2013e12c5d1SDavid du Colombier break; 2023e12c5d1SDavid du Colombier case CAT: 2033e12c5d1SDavid du Colombier op2 = popand(0); 2043e12c5d1SDavid du Colombier op1 = popand(0); 2053e12c5d1SDavid du Colombier op1->last->next = op2->first; 2063e12c5d1SDavid du Colombier pushand(op1->first, op2->last); 2073e12c5d1SDavid du Colombier break; 2083e12c5d1SDavid du Colombier case STAR: 2093e12c5d1SDavid du Colombier op2 = popand('*'); 2103e12c5d1SDavid du Colombier inst1 = newinst(OR); 2113e12c5d1SDavid du Colombier op2->last->next = inst1; 2123e12c5d1SDavid du Colombier inst1->right = op2->first; 2133e12c5d1SDavid du Colombier pushand(inst1, inst1); 2143e12c5d1SDavid du Colombier break; 2153e12c5d1SDavid du Colombier case PLUS: 2163e12c5d1SDavid du Colombier op2 = popand('+'); 2173e12c5d1SDavid du Colombier inst1 = newinst(OR); 2183e12c5d1SDavid du Colombier op2->last->next = inst1; 2193e12c5d1SDavid du Colombier inst1->right = op2->first; 2203e12c5d1SDavid du Colombier pushand(op2->first, inst1); 2213e12c5d1SDavid du Colombier break; 2223e12c5d1SDavid du Colombier case QUEST: 2233e12c5d1SDavid du Colombier op2 = popand('?'); 2243e12c5d1SDavid du Colombier inst1 = newinst(OR); 2253e12c5d1SDavid du Colombier inst2 = newinst(NOP); 2263e12c5d1SDavid du Colombier inst1->left = inst2; 2273e12c5d1SDavid du Colombier inst1->right = op2->first; 2283e12c5d1SDavid du Colombier op2->last->next = inst2; 2293e12c5d1SDavid du Colombier pushand(inst1, inst2); 2303e12c5d1SDavid du Colombier break; 2313e12c5d1SDavid du Colombier } 2323e12c5d1SDavid du Colombier } 2333e12c5d1SDavid du Colombier } 2343e12c5d1SDavid du Colombier 2353e12c5d1SDavid du Colombier static Reprog* 2363e12c5d1SDavid du Colombier optimize(Reprog *pp) 2373e12c5d1SDavid du Colombier { 2383e12c5d1SDavid du Colombier Reinst *inst, *target; 2393e12c5d1SDavid du Colombier int size; 2403e12c5d1SDavid du Colombier Reprog *npp; 241219b2ee8SDavid du Colombier Reclass *cl; 2423e12c5d1SDavid du Colombier int diff; 2433e12c5d1SDavid du Colombier 2443e12c5d1SDavid du Colombier /* 2453e12c5d1SDavid du Colombier * get rid of NOOP chains 2463e12c5d1SDavid du Colombier */ 2473e12c5d1SDavid du Colombier for(inst=pp->firstinst; inst->type!=END; inst++){ 2483e12c5d1SDavid du Colombier target = inst->next; 2493e12c5d1SDavid du Colombier while(target->type == NOP) 2503e12c5d1SDavid du Colombier target = target->next; 2513e12c5d1SDavid du Colombier inst->next = target; 2523e12c5d1SDavid du Colombier } 2533e12c5d1SDavid du Colombier 2543e12c5d1SDavid du Colombier /* 2553e12c5d1SDavid du Colombier * The original allocation is for an area larger than 2563e12c5d1SDavid du Colombier * necessary. Reallocate to the actual space used 2573e12c5d1SDavid du Colombier * and then relocate the code. 2583e12c5d1SDavid du Colombier */ 2593e12c5d1SDavid du Colombier size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst); 2603e12c5d1SDavid du Colombier npp = realloc(pp, size); 2613e12c5d1SDavid du Colombier if(npp==0 || npp==pp) 2623e12c5d1SDavid du Colombier return pp; 2633e12c5d1SDavid du Colombier diff = (char *)npp - (char *)pp; 2643e12c5d1SDavid du Colombier freep = (Reinst *)((char *)freep + diff); 2653e12c5d1SDavid du Colombier for(inst=npp->firstinst; inst<freep; inst++){ 2663e12c5d1SDavid du Colombier switch(inst->type){ 2673e12c5d1SDavid du Colombier case OR: 2683e12c5d1SDavid du Colombier case STAR: 2693e12c5d1SDavid du Colombier case PLUS: 2703e12c5d1SDavid du Colombier case QUEST: 271219b2ee8SDavid du Colombier *(char **)&inst->right += diff; 272219b2ee8SDavid du Colombier break; 2733e12c5d1SDavid du Colombier case CCLASS: 2743e12c5d1SDavid du Colombier case NCCLASS: 2753e12c5d1SDavid du Colombier *(char **)&inst->right += diff; 276219b2ee8SDavid du Colombier cl = inst->cp; 277219b2ee8SDavid du Colombier *(char **)&cl->end += diff; 2783e12c5d1SDavid du Colombier break; 2793e12c5d1SDavid du Colombier } 2803e12c5d1SDavid du Colombier *(char **)&inst->left += diff; 2813e12c5d1SDavid du Colombier } 2823e12c5d1SDavid du Colombier *(char **)&npp->startinst += diff; 2833e12c5d1SDavid du Colombier return npp; 2843e12c5d1SDavid du Colombier } 2853e12c5d1SDavid du Colombier 2863e12c5d1SDavid du Colombier #ifdef DEBUG 2873e12c5d1SDavid du Colombier static void 2883e12c5d1SDavid du Colombier dumpstack(void){ 2893e12c5d1SDavid du Colombier Node *stk; 2903e12c5d1SDavid du Colombier int *ip; 2913e12c5d1SDavid du Colombier 2923e12c5d1SDavid du Colombier print("operators\n"); 2933e12c5d1SDavid du Colombier for(ip=atorstack; ip<atorp; ip++) 2943e12c5d1SDavid du Colombier print("0%o\n", *ip); 2953e12c5d1SDavid du Colombier print("operands\n"); 2963e12c5d1SDavid du Colombier for(stk=andstack; stk<andp; stk++) 2973e12c5d1SDavid du Colombier print("0%o\t0%o\n", stk->first->type, stk->last->type); 2983e12c5d1SDavid du Colombier } 2993e12c5d1SDavid du Colombier 3003e12c5d1SDavid du Colombier static void 3013e12c5d1SDavid du Colombier dump(Reprog *pp) 3023e12c5d1SDavid du Colombier { 3033e12c5d1SDavid du Colombier Reinst *l; 3043e12c5d1SDavid du Colombier Rune *p; 3053e12c5d1SDavid du Colombier 3063e12c5d1SDavid du Colombier l = pp->firstinst; 3073e12c5d1SDavid du Colombier do{ 3083e12c5d1SDavid du Colombier print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type, 3093e12c5d1SDavid du Colombier l->left-pp->firstinst, l->right-pp->firstinst); 3103e12c5d1SDavid du Colombier if(l->type == RUNE) 3113e12c5d1SDavid du Colombier print("\t%C\n", l->r); 3123e12c5d1SDavid du Colombier else if(l->type == CCLASS || l->type == NCCLASS){ 3133e12c5d1SDavid du Colombier print("\t["); 3143e12c5d1SDavid du Colombier if(l->type == NCCLASS) 3153e12c5d1SDavid du Colombier print("^"); 3163e12c5d1SDavid du Colombier for(p = l->cp->spans; p < l->cp->end; p += 2) 3173e12c5d1SDavid du Colombier if(p[0] == p[1]) 3183e12c5d1SDavid du Colombier print("%C", p[0]); 3193e12c5d1SDavid du Colombier else 3203e12c5d1SDavid du Colombier print("%C-%C", p[0], p[1]); 3213e12c5d1SDavid du Colombier print("]\n"); 3223e12c5d1SDavid du Colombier } else 3233e12c5d1SDavid du Colombier print("\n"); 3243e12c5d1SDavid du Colombier }while(l++->type); 3253e12c5d1SDavid du Colombier } 3263e12c5d1SDavid du Colombier #endif 3273e12c5d1SDavid du Colombier 3283e12c5d1SDavid du Colombier static Reclass* 3293e12c5d1SDavid du Colombier newclass(void) 3303e12c5d1SDavid du Colombier { 331*3468a491SDavid du Colombier if(nclass >= nelem(reprog->class)) 332*3468a491SDavid du Colombier rcerror("too many character classes; increase Reprog.class size"); 3333e12c5d1SDavid du Colombier return &(classp[nclass++]); 3343e12c5d1SDavid du Colombier } 3353e12c5d1SDavid du Colombier 3363e12c5d1SDavid du Colombier static int 3373e12c5d1SDavid du Colombier nextc(Rune *rp) 3383e12c5d1SDavid du Colombier { 3393e12c5d1SDavid du Colombier if(lexdone){ 3403e12c5d1SDavid du Colombier *rp = 0; 3413e12c5d1SDavid du Colombier return 1; 3423e12c5d1SDavid du Colombier } 3433e12c5d1SDavid du Colombier exprp += chartorune(rp, exprp); 3443e12c5d1SDavid du Colombier if(*rp == L'\\'){ 3453e12c5d1SDavid du Colombier exprp += chartorune(rp, exprp); 3463e12c5d1SDavid du Colombier return 1; 3473e12c5d1SDavid du Colombier } 3483e12c5d1SDavid du Colombier if(*rp == 0) 3493e12c5d1SDavid du Colombier lexdone = 1; 3503e12c5d1SDavid du Colombier return 0; 3513e12c5d1SDavid du Colombier } 3523e12c5d1SDavid du Colombier 3533e12c5d1SDavid du Colombier static int 3543e12c5d1SDavid du Colombier lex(int literal, int dot_type) 3553e12c5d1SDavid du Colombier { 3563e12c5d1SDavid du Colombier int quoted; 3573e12c5d1SDavid du Colombier 3583e12c5d1SDavid du Colombier quoted = nextc(&yyrune); 3593e12c5d1SDavid du Colombier if(literal || quoted){ 3603e12c5d1SDavid du Colombier if(yyrune == 0) 3613e12c5d1SDavid du Colombier return END; 3623e12c5d1SDavid du Colombier return RUNE; 3633e12c5d1SDavid du Colombier } 3643e12c5d1SDavid du Colombier 3653e12c5d1SDavid du Colombier switch(yyrune){ 3663e12c5d1SDavid du Colombier case 0: 3673e12c5d1SDavid du Colombier return END; 3683e12c5d1SDavid du Colombier case L'*': 3693e12c5d1SDavid du Colombier return STAR; 3703e12c5d1SDavid du Colombier case L'?': 3713e12c5d1SDavid du Colombier return QUEST; 3723e12c5d1SDavid du Colombier case L'+': 3733e12c5d1SDavid du Colombier return PLUS; 3743e12c5d1SDavid du Colombier case L'|': 3753e12c5d1SDavid du Colombier return OR; 3763e12c5d1SDavid du Colombier case L'.': 3773e12c5d1SDavid du Colombier return dot_type; 3783e12c5d1SDavid du Colombier case L'(': 3793e12c5d1SDavid du Colombier return LBRA; 3803e12c5d1SDavid du Colombier case L')': 3813e12c5d1SDavid du Colombier return RBRA; 3823e12c5d1SDavid du Colombier case L'^': 3833e12c5d1SDavid du Colombier return BOL; 3843e12c5d1SDavid du Colombier case L'$': 3853e12c5d1SDavid du Colombier return EOL; 3863e12c5d1SDavid du Colombier case L'[': 3873e12c5d1SDavid du Colombier return bldcclass(); 3883e12c5d1SDavid du Colombier } 3893e12c5d1SDavid du Colombier return RUNE; 3903e12c5d1SDavid du Colombier } 3913e12c5d1SDavid du Colombier 3923e12c5d1SDavid du Colombier static int 3933e12c5d1SDavid du Colombier bldcclass(void) 3943e12c5d1SDavid du Colombier { 3953e12c5d1SDavid du Colombier int type; 3963e12c5d1SDavid du Colombier Rune r[NCCRUNE]; 3973e12c5d1SDavid du Colombier Rune *p, *ep, *np; 3983e12c5d1SDavid du Colombier Rune rune; 3993e12c5d1SDavid du Colombier int quoted; 4003e12c5d1SDavid du Colombier 4013e12c5d1SDavid du Colombier /* we have already seen the '[' */ 4023e12c5d1SDavid du Colombier type = CCLASS; 4033e12c5d1SDavid du Colombier yyclassp = newclass(); 4043e12c5d1SDavid du Colombier 4053e12c5d1SDavid du Colombier /* look ahead for negation */ 4063e12c5d1SDavid du Colombier /* SPECIAL CASE!!! negated classes don't match \n */ 4073e12c5d1SDavid du Colombier ep = r; 4083e12c5d1SDavid du Colombier quoted = nextc(&rune); 4093e12c5d1SDavid du Colombier if(!quoted && rune == L'^'){ 4103e12c5d1SDavid du Colombier type = NCCLASS; 4113e12c5d1SDavid du Colombier quoted = nextc(&rune); 4123e12c5d1SDavid du Colombier *ep++ = L'\n'; 4133e12c5d1SDavid du Colombier *ep++ = L'\n'; 4143e12c5d1SDavid du Colombier } 4153e12c5d1SDavid du Colombier 4163e12c5d1SDavid du Colombier /* parse class into a set of spans */ 417*3468a491SDavid du Colombier while(ep < &r[NCCRUNE-1]){ 4183e12c5d1SDavid du Colombier if(rune == 0){ 4193e12c5d1SDavid du Colombier rcerror("malformed '[]'"); 4203e12c5d1SDavid du Colombier return 0; 4213e12c5d1SDavid du Colombier } 4223e12c5d1SDavid du Colombier if(!quoted && rune == L']') 4233e12c5d1SDavid du Colombier break; 4243e12c5d1SDavid du Colombier if(!quoted && rune == L'-'){ 4253e12c5d1SDavid du Colombier if(ep == r){ 4263e12c5d1SDavid du Colombier rcerror("malformed '[]'"); 4273e12c5d1SDavid du Colombier return 0; 4283e12c5d1SDavid du Colombier } 4293e12c5d1SDavid du Colombier quoted = nextc(&rune); 4303e12c5d1SDavid du Colombier if((!quoted && rune == L']') || rune == 0){ 4313e12c5d1SDavid du Colombier rcerror("malformed '[]'"); 4323e12c5d1SDavid du Colombier return 0; 4333e12c5d1SDavid du Colombier } 4343e12c5d1SDavid du Colombier *(ep-1) = rune; 4353e12c5d1SDavid du Colombier } else { 4363e12c5d1SDavid du Colombier *ep++ = rune; 4373e12c5d1SDavid du Colombier *ep++ = rune; 4383e12c5d1SDavid du Colombier } 4393e12c5d1SDavid du Colombier quoted = nextc(&rune); 4403e12c5d1SDavid du Colombier } 441*3468a491SDavid du Colombier if(ep >= &r[NCCRUNE-1]) { 442*3468a491SDavid du Colombier rcerror("char class too large; increase Reclass.spans size"); 443*3468a491SDavid du Colombier return 0; 444*3468a491SDavid du Colombier } 4453e12c5d1SDavid du Colombier 4463e12c5d1SDavid du Colombier /* sort on span start */ 4473e12c5d1SDavid du Colombier for(p = r; p < ep; p += 2){ 4483e12c5d1SDavid du Colombier for(np = p; np < ep; np += 2) 4493e12c5d1SDavid du Colombier if(*np < *p){ 4503e12c5d1SDavid du Colombier rune = np[0]; 4513e12c5d1SDavid du Colombier np[0] = p[0]; 4523e12c5d1SDavid du Colombier p[0] = rune; 4533e12c5d1SDavid du Colombier rune = np[1]; 4543e12c5d1SDavid du Colombier np[1] = p[1]; 4553e12c5d1SDavid du Colombier p[1] = rune; 4563e12c5d1SDavid du Colombier } 4573e12c5d1SDavid du Colombier } 4583e12c5d1SDavid du Colombier 4593e12c5d1SDavid du Colombier /* merge spans */ 4603e12c5d1SDavid du Colombier np = yyclassp->spans; 4613e12c5d1SDavid du Colombier p = r; 4623e12c5d1SDavid du Colombier if(r == ep) 4633e12c5d1SDavid du Colombier yyclassp->end = np; 4643e12c5d1SDavid du Colombier else { 4653e12c5d1SDavid du Colombier np[0] = *p++; 4663e12c5d1SDavid du Colombier np[1] = *p++; 4673e12c5d1SDavid du Colombier for(; p < ep; p += 2) 468*3468a491SDavid du Colombier /* overlapping or adjacent ranges? */ 469*3468a491SDavid du Colombier if(p[0] <= np[1] + 1){ 470*3468a491SDavid du Colombier if(p[1] >= np[1]) 471*3468a491SDavid du Colombier np[1] = p[1]; /* coalesce */ 4723e12c5d1SDavid du Colombier } else { 4733e12c5d1SDavid du Colombier np += 2; 4743e12c5d1SDavid du Colombier np[0] = p[0]; 4753e12c5d1SDavid du Colombier np[1] = p[1]; 4763e12c5d1SDavid du Colombier } 4773e12c5d1SDavid du Colombier yyclassp->end = np+2; 4783e12c5d1SDavid du Colombier } 4793e12c5d1SDavid du Colombier 4803e12c5d1SDavid du Colombier return type; 4813e12c5d1SDavid du Colombier } 4823e12c5d1SDavid du Colombier 4833e12c5d1SDavid du Colombier static Reprog* 4843e12c5d1SDavid du Colombier regcomp1(char *s, int literal, int dot_type) 4853e12c5d1SDavid du Colombier { 4863e12c5d1SDavid du Colombier int token; 4873e12c5d1SDavid du Colombier Reprog *pp; 4883e12c5d1SDavid du Colombier 4893e12c5d1SDavid du Colombier /* get memory for the program */ 4903e12c5d1SDavid du Colombier pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s)); 4913e12c5d1SDavid du Colombier if(pp == 0){ 4923e12c5d1SDavid du Colombier regerror("out of memory"); 4933e12c5d1SDavid du Colombier return 0; 4943e12c5d1SDavid du Colombier } 4953e12c5d1SDavid du Colombier freep = pp->firstinst; 4963e12c5d1SDavid du Colombier classp = pp->class; 4973e12c5d1SDavid du Colombier errors = 0; 4983e12c5d1SDavid du Colombier 4993e12c5d1SDavid du Colombier if(setjmp(regkaboom)) 5003e12c5d1SDavid du Colombier goto out; 5013e12c5d1SDavid du Colombier 5023e12c5d1SDavid du Colombier /* go compile the sucker */ 5033e12c5d1SDavid du Colombier lexdone = 0; 5043e12c5d1SDavid du Colombier exprp = s; 5053e12c5d1SDavid du Colombier nclass = 0; 5063e12c5d1SDavid du Colombier nbra = 0; 5073e12c5d1SDavid du Colombier atorp = atorstack; 5083e12c5d1SDavid du Colombier andp = andstack; 5093e12c5d1SDavid du Colombier subidp = subidstack; 5103e12c5d1SDavid du Colombier lastwasand = FALSE; 5113e12c5d1SDavid du Colombier cursubid = 0; 5123e12c5d1SDavid du Colombier 5133e12c5d1SDavid du Colombier /* Start with a low priority operator to prime parser */ 5143e12c5d1SDavid du Colombier pushator(START-1); 5153e12c5d1SDavid du Colombier while((token = lex(literal, dot_type)) != END){ 5163e12c5d1SDavid du Colombier if((token&0300) == OPERATOR) 5173e12c5d1SDavid du Colombier operator(token); 5183e12c5d1SDavid du Colombier else 5193e12c5d1SDavid du Colombier operand(token); 5203e12c5d1SDavid du Colombier } 5213e12c5d1SDavid du Colombier 5223e12c5d1SDavid du Colombier /* Close with a low priority operator */ 5233e12c5d1SDavid du Colombier evaluntil(START); 5243e12c5d1SDavid du Colombier 5253e12c5d1SDavid du Colombier /* Force END */ 5263e12c5d1SDavid du Colombier operand(END); 5273e12c5d1SDavid du Colombier evaluntil(START); 5283e12c5d1SDavid du Colombier #ifdef DEBUG 5293e12c5d1SDavid du Colombier dumpstack(); 5303e12c5d1SDavid du Colombier #endif 5313e12c5d1SDavid du Colombier if(nbra) 5323e12c5d1SDavid du Colombier rcerror("unmatched left paren"); 5333e12c5d1SDavid du Colombier --andp; /* points to first and only operand */ 5343e12c5d1SDavid du Colombier pp->startinst = andp->first; 5353e12c5d1SDavid du Colombier #ifdef DEBUG 5363e12c5d1SDavid du Colombier dump(pp); 5373e12c5d1SDavid du Colombier #endif 5383e12c5d1SDavid du Colombier pp = optimize(pp); 5393e12c5d1SDavid du Colombier #ifdef DEBUG 5403e12c5d1SDavid du Colombier print("start: %d\n", andp->first-pp->firstinst); 5413e12c5d1SDavid du Colombier dump(pp); 5423e12c5d1SDavid du Colombier #endif 5433e12c5d1SDavid du Colombier out: 5443e12c5d1SDavid du Colombier if(errors){ 5453e12c5d1SDavid du Colombier free(pp); 5463e12c5d1SDavid du Colombier pp = 0; 5473e12c5d1SDavid du Colombier } 5483e12c5d1SDavid du Colombier return pp; 5493e12c5d1SDavid du Colombier } 5503e12c5d1SDavid du Colombier 5513e12c5d1SDavid du Colombier extern Reprog* 5523e12c5d1SDavid du Colombier regcomp(char *s) 5533e12c5d1SDavid du Colombier { 5543e12c5d1SDavid du Colombier return regcomp1(s, 0, ANY); 5553e12c5d1SDavid du Colombier } 5563e12c5d1SDavid du Colombier 5573e12c5d1SDavid du Colombier extern Reprog* 5583e12c5d1SDavid du Colombier regcomplit(char *s) 5593e12c5d1SDavid du Colombier { 5603e12c5d1SDavid du Colombier return regcomp1(s, 1, ANY); 5613e12c5d1SDavid du Colombier } 5623e12c5d1SDavid du Colombier 5633e12c5d1SDavid du Colombier extern Reprog* 5643e12c5d1SDavid du Colombier regcompnl(char *s) 5653e12c5d1SDavid du Colombier { 5663e12c5d1SDavid du Colombier return regcomp1(s, 0, ANYNL); 5673e12c5d1SDavid du Colombier } 568