13e12c5d1SDavid du Colombier #include <u.h> 23e12c5d1SDavid du Colombier #include <libc.h> 33e12c5d1SDavid du Colombier #include "regexp.h" 43e12c5d1SDavid du Colombier #include "regcomp.h" 53e12c5d1SDavid du Colombier 63e12c5d1SDavid du Colombier #define TRUE 1 73e12c5d1SDavid du Colombier #define FALSE 0 83e12c5d1SDavid du Colombier 93e12c5d1SDavid du Colombier /* 103e12c5d1SDavid du Colombier * Parser Information 113e12c5d1SDavid du Colombier */ 123e12c5d1SDavid du Colombier typedef 133e12c5d1SDavid du Colombier struct Node 143e12c5d1SDavid du Colombier { 153e12c5d1SDavid du Colombier Reinst* first; 163e12c5d1SDavid du Colombier Reinst* last; 173e12c5d1SDavid du Colombier }Node; 183e12c5d1SDavid du Colombier 193e12c5d1SDavid du Colombier #define NSTACK 20 203e12c5d1SDavid du Colombier static Node andstack[NSTACK]; 213e12c5d1SDavid du Colombier static Node *andp; 223e12c5d1SDavid du Colombier static int atorstack[NSTACK]; 233e12c5d1SDavid du Colombier static int* atorp; 243e12c5d1SDavid du Colombier static int cursubid; /* id of current subexpression */ 253e12c5d1SDavid du Colombier static int subidstack[NSTACK]; /* parallel to atorstack */ 263e12c5d1SDavid du Colombier static int* subidp; 273e12c5d1SDavid du Colombier static int lastwasand; /* Last token was operand */ 283e12c5d1SDavid du Colombier static int nbra; 293e12c5d1SDavid du Colombier static char* exprp; /* pointer to next character in source expression */ 303e12c5d1SDavid du Colombier static int lexdone; 313e12c5d1SDavid du Colombier static int nclass; 323e12c5d1SDavid du Colombier static Reclass*classp; 333e12c5d1SDavid du Colombier static Reinst* freep; 343e12c5d1SDavid du Colombier static int errors; 353e12c5d1SDavid du Colombier static Rune yyrune; /* last lex'd rune */ 363e12c5d1SDavid du Colombier static Reclass*yyclassp; /* last lex'd class */ 373e12c5d1SDavid du Colombier 383e12c5d1SDavid du Colombier /* predeclared crap */ 393e12c5d1SDavid du Colombier static void operator(int); 403e12c5d1SDavid du Colombier static void pushand(Reinst*, Reinst*); 413e12c5d1SDavid du Colombier static void pushator(int); 423e12c5d1SDavid du Colombier static void evaluntil(int); 433e12c5d1SDavid du Colombier static int bldcclass(void); 443e12c5d1SDavid du Colombier 453e12c5d1SDavid du Colombier static jmp_buf regkaboom; 463e12c5d1SDavid du Colombier 473e12c5d1SDavid du Colombier static void 483e12c5d1SDavid du Colombier rcerror(char *s) 493e12c5d1SDavid du Colombier { 503e12c5d1SDavid du Colombier errors++; 513e12c5d1SDavid du Colombier regerror(s); 523e12c5d1SDavid du Colombier longjmp(regkaboom, 1); 533e12c5d1SDavid du Colombier } 543e12c5d1SDavid du Colombier 553e12c5d1SDavid du Colombier static Reinst* 563e12c5d1SDavid du Colombier newinst(int t) 573e12c5d1SDavid du Colombier { 583e12c5d1SDavid du Colombier freep->type = t; 593e12c5d1SDavid du Colombier freep->left = 0; 603e12c5d1SDavid du Colombier freep->right = 0; 613e12c5d1SDavid du Colombier return freep++; 623e12c5d1SDavid du Colombier } 633e12c5d1SDavid du Colombier 643e12c5d1SDavid du Colombier static void 653e12c5d1SDavid du Colombier operand(int t) 663e12c5d1SDavid du Colombier { 673e12c5d1SDavid du Colombier Reinst *i; 683e12c5d1SDavid du Colombier 693e12c5d1SDavid du Colombier if(lastwasand) 703e12c5d1SDavid du Colombier operator(CAT); /* catenate is implicit */ 713e12c5d1SDavid du Colombier i = newinst(t); 723e12c5d1SDavid du Colombier 733e12c5d1SDavid du Colombier if(t == CCLASS || t == NCCLASS) 743e12c5d1SDavid du Colombier i->cp = yyclassp; 753e12c5d1SDavid du Colombier if(t == RUNE) 763e12c5d1SDavid du Colombier i->r = yyrune; 773e12c5d1SDavid du Colombier 783e12c5d1SDavid du Colombier pushand(i, i); 793e12c5d1SDavid du Colombier lastwasand = TRUE; 803e12c5d1SDavid du Colombier } 813e12c5d1SDavid du Colombier 823e12c5d1SDavid du Colombier static void 833e12c5d1SDavid du Colombier operator(int t) 843e12c5d1SDavid du Colombier { 853e12c5d1SDavid du Colombier if(t==RBRA && --nbra<0) 863e12c5d1SDavid du Colombier rcerror("unmatched right paren"); 873e12c5d1SDavid du Colombier if(t==LBRA){ 883e12c5d1SDavid du Colombier if(++cursubid >= NSUBEXP) 893e12c5d1SDavid du Colombier rcerror ("too many subexpressions"); 903e12c5d1SDavid du Colombier nbra++; 913e12c5d1SDavid du Colombier if(lastwasand) 923e12c5d1SDavid du Colombier operator(CAT); 933e12c5d1SDavid du Colombier } else 943e12c5d1SDavid du Colombier evaluntil(t); 953e12c5d1SDavid du Colombier if(t != RBRA) 963e12c5d1SDavid du Colombier pushator(t); 973e12c5d1SDavid du Colombier lastwasand = FALSE; 983e12c5d1SDavid du Colombier if(t==STAR || t==QUEST || t==PLUS || t==RBRA) 993e12c5d1SDavid du Colombier lastwasand = TRUE; /* these look like operands */ 1003e12c5d1SDavid du Colombier } 1013e12c5d1SDavid du Colombier 1023e12c5d1SDavid du Colombier static void 1033e12c5d1SDavid du Colombier regerr2(char *s, int c) 1043e12c5d1SDavid du Colombier { 1053e12c5d1SDavid du Colombier char buf[100]; 1063e12c5d1SDavid du Colombier char *cp = buf; 1073e12c5d1SDavid du Colombier while(*s) 1083e12c5d1SDavid du Colombier *cp++ = *s++; 1093e12c5d1SDavid du Colombier *cp++ = c; 1103e12c5d1SDavid du Colombier *cp = '\0'; 1113e12c5d1SDavid du Colombier rcerror(buf); 1123e12c5d1SDavid du Colombier } 1133e12c5d1SDavid du Colombier 1143e12c5d1SDavid du Colombier static void 1153e12c5d1SDavid du Colombier cant(char *s) 1163e12c5d1SDavid du Colombier { 1173e12c5d1SDavid du Colombier char buf[100]; 1183e12c5d1SDavid du Colombier strcpy(buf, "can't happen: "); 1193e12c5d1SDavid du Colombier strcat(buf, s); 1203e12c5d1SDavid du Colombier rcerror(buf); 1213e12c5d1SDavid du Colombier } 1223e12c5d1SDavid du Colombier 1233e12c5d1SDavid du Colombier static void 1243e12c5d1SDavid du Colombier pushand(Reinst *f, Reinst *l) 1253e12c5d1SDavid du Colombier { 1263e12c5d1SDavid du Colombier if(andp >= &andstack[NSTACK]) 1273e12c5d1SDavid du Colombier cant("operand stack overflow"); 1283e12c5d1SDavid du Colombier andp->first = f; 1293e12c5d1SDavid du Colombier andp->last = l; 1303e12c5d1SDavid du Colombier andp++; 1313e12c5d1SDavid du Colombier } 1323e12c5d1SDavid du Colombier 1333e12c5d1SDavid du Colombier static void 1343e12c5d1SDavid du Colombier pushator(int t) 1353e12c5d1SDavid du Colombier { 1363e12c5d1SDavid du Colombier if(atorp >= &atorstack[NSTACK]) 1373e12c5d1SDavid du Colombier cant("operator stack overflow"); 1383e12c5d1SDavid du Colombier *atorp++ = t; 1393e12c5d1SDavid du Colombier *subidp++ = cursubid; 1403e12c5d1SDavid du Colombier } 1413e12c5d1SDavid du Colombier 1423e12c5d1SDavid du Colombier static Node* 1433e12c5d1SDavid du Colombier popand(int op) 1443e12c5d1SDavid du Colombier { 1453e12c5d1SDavid du Colombier Reinst *inst; 1463e12c5d1SDavid du Colombier 1473e12c5d1SDavid du Colombier if(andp <= &andstack[0]){ 1483e12c5d1SDavid du Colombier regerr2("missing operand for ", op); 1493e12c5d1SDavid du Colombier inst = newinst(NOP); 1503e12c5d1SDavid du Colombier pushand(inst,inst); 1513e12c5d1SDavid du Colombier } 1523e12c5d1SDavid du Colombier return --andp; 1533e12c5d1SDavid du Colombier } 1543e12c5d1SDavid du Colombier 1553e12c5d1SDavid du Colombier static int 1563e12c5d1SDavid du Colombier popator(void) 1573e12c5d1SDavid du Colombier { 1583e12c5d1SDavid du Colombier if(atorp <= &atorstack[0]) 1593e12c5d1SDavid du Colombier cant("operator stack underflow"); 1603e12c5d1SDavid du Colombier --subidp; 1613e12c5d1SDavid du Colombier return *--atorp; 1623e12c5d1SDavid du Colombier } 1633e12c5d1SDavid du Colombier 1643e12c5d1SDavid du Colombier static void 1653e12c5d1SDavid du Colombier evaluntil(int pri) 1663e12c5d1SDavid du Colombier { 1673e12c5d1SDavid du Colombier Node *op1, *op2; 1683e12c5d1SDavid du Colombier Reinst *inst1, *inst2; 1693e12c5d1SDavid du Colombier 1703e12c5d1SDavid du Colombier while(pri==RBRA || atorp[-1]>=pri){ 1713e12c5d1SDavid du Colombier switch(popator()){ 1723e12c5d1SDavid du Colombier default: 1733e12c5d1SDavid du Colombier rcerror("unknown operator in evaluntil"); 1743e12c5d1SDavid du Colombier break; 1753e12c5d1SDavid du Colombier case LBRA: /* must have been RBRA */ 1763e12c5d1SDavid du Colombier op1 = popand('('); 1773e12c5d1SDavid du Colombier inst2 = newinst(RBRA); 1783e12c5d1SDavid du Colombier inst2->subid = *subidp; 1793e12c5d1SDavid du Colombier op1->last->next = inst2; 1803e12c5d1SDavid du Colombier inst1 = newinst(LBRA); 1813e12c5d1SDavid du Colombier inst1->subid = *subidp; 1823e12c5d1SDavid du Colombier inst1->next = op1->first; 1833e12c5d1SDavid du Colombier pushand(inst1, inst2); 1843e12c5d1SDavid du Colombier return; 1853e12c5d1SDavid du Colombier case OR: 1863e12c5d1SDavid du Colombier op2 = popand('|'); 1873e12c5d1SDavid du Colombier op1 = popand('|'); 1883e12c5d1SDavid du Colombier inst2 = newinst(NOP); 1893e12c5d1SDavid du Colombier op2->last->next = inst2; 1903e12c5d1SDavid du Colombier op1->last->next = inst2; 1913e12c5d1SDavid du Colombier inst1 = newinst(OR); 1923e12c5d1SDavid du Colombier inst1->right = op1->first; 1933e12c5d1SDavid du Colombier inst1->left = op2->first; 1943e12c5d1SDavid du Colombier pushand(inst1, inst2); 1953e12c5d1SDavid du Colombier break; 1963e12c5d1SDavid du Colombier case CAT: 1973e12c5d1SDavid du Colombier op2 = popand(0); 1983e12c5d1SDavid du Colombier op1 = popand(0); 1993e12c5d1SDavid du Colombier op1->last->next = op2->first; 2003e12c5d1SDavid du Colombier pushand(op1->first, op2->last); 2013e12c5d1SDavid du Colombier break; 2023e12c5d1SDavid du Colombier case STAR: 2033e12c5d1SDavid du Colombier op2 = popand('*'); 2043e12c5d1SDavid du Colombier inst1 = newinst(OR); 2053e12c5d1SDavid du Colombier op2->last->next = inst1; 2063e12c5d1SDavid du Colombier inst1->right = op2->first; 2073e12c5d1SDavid du Colombier pushand(inst1, inst1); 2083e12c5d1SDavid du Colombier break; 2093e12c5d1SDavid du Colombier case PLUS: 2103e12c5d1SDavid du Colombier op2 = popand('+'); 2113e12c5d1SDavid du Colombier inst1 = newinst(OR); 2123e12c5d1SDavid du Colombier op2->last->next = inst1; 2133e12c5d1SDavid du Colombier inst1->right = op2->first; 2143e12c5d1SDavid du Colombier pushand(op2->first, inst1); 2153e12c5d1SDavid du Colombier break; 2163e12c5d1SDavid du Colombier case QUEST: 2173e12c5d1SDavid du Colombier op2 = popand('?'); 2183e12c5d1SDavid du Colombier inst1 = newinst(OR); 2193e12c5d1SDavid du Colombier inst2 = newinst(NOP); 2203e12c5d1SDavid du Colombier inst1->left = inst2; 2213e12c5d1SDavid du Colombier inst1->right = op2->first; 2223e12c5d1SDavid du Colombier op2->last->next = inst2; 2233e12c5d1SDavid du Colombier pushand(inst1, inst2); 2243e12c5d1SDavid du Colombier break; 2253e12c5d1SDavid du Colombier } 2263e12c5d1SDavid du Colombier } 2273e12c5d1SDavid du Colombier } 2283e12c5d1SDavid du Colombier 2293e12c5d1SDavid du Colombier static Reprog* 2303e12c5d1SDavid du Colombier optimize(Reprog *pp) 2313e12c5d1SDavid du Colombier { 2323e12c5d1SDavid du Colombier Reinst *inst, *target; 2333e12c5d1SDavid du Colombier int size; 2343e12c5d1SDavid du Colombier Reprog *npp; 235*219b2ee8SDavid du Colombier Reclass *cl; 2363e12c5d1SDavid du Colombier int diff; 2373e12c5d1SDavid du Colombier 2383e12c5d1SDavid du Colombier /* 2393e12c5d1SDavid du Colombier * get rid of NOOP chains 2403e12c5d1SDavid du Colombier */ 2413e12c5d1SDavid du Colombier for(inst=pp->firstinst; inst->type!=END; inst++){ 2423e12c5d1SDavid du Colombier target = inst->next; 2433e12c5d1SDavid du Colombier while(target->type == NOP) 2443e12c5d1SDavid du Colombier target = target->next; 2453e12c5d1SDavid du Colombier inst->next = target; 2463e12c5d1SDavid du Colombier } 2473e12c5d1SDavid du Colombier 2483e12c5d1SDavid du Colombier /* 2493e12c5d1SDavid du Colombier * The original allocation is for an area larger than 2503e12c5d1SDavid du Colombier * necessary. Reallocate to the actual space used 2513e12c5d1SDavid du Colombier * and then relocate the code. 2523e12c5d1SDavid du Colombier */ 2533e12c5d1SDavid du Colombier size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst); 2543e12c5d1SDavid du Colombier npp = realloc(pp, size); 2553e12c5d1SDavid du Colombier if(npp==0 || npp==pp) 2563e12c5d1SDavid du Colombier return pp; 2573e12c5d1SDavid du Colombier diff = (char *)npp - (char *)pp; 2583e12c5d1SDavid du Colombier freep = (Reinst *)((char *)freep + diff); 2593e12c5d1SDavid du Colombier for(inst=npp->firstinst; inst<freep; inst++){ 2603e12c5d1SDavid du Colombier switch(inst->type){ 2613e12c5d1SDavid du Colombier case OR: 2623e12c5d1SDavid du Colombier case STAR: 2633e12c5d1SDavid du Colombier case PLUS: 2643e12c5d1SDavid du Colombier case QUEST: 265*219b2ee8SDavid du Colombier *(char **)&inst->right += diff; 266*219b2ee8SDavid du Colombier break; 2673e12c5d1SDavid du Colombier case CCLASS: 2683e12c5d1SDavid du Colombier case NCCLASS: 2693e12c5d1SDavid du Colombier *(char **)&inst->right += diff; 270*219b2ee8SDavid du Colombier cl = inst->cp; 271*219b2ee8SDavid du Colombier *(char **)&cl->end += diff; 2723e12c5d1SDavid du Colombier break; 2733e12c5d1SDavid du Colombier } 2743e12c5d1SDavid du Colombier *(char **)&inst->left += diff; 2753e12c5d1SDavid du Colombier } 2763e12c5d1SDavid du Colombier *(char **)&npp->startinst += diff; 2773e12c5d1SDavid du Colombier return npp; 2783e12c5d1SDavid du Colombier } 2793e12c5d1SDavid du Colombier 2803e12c5d1SDavid du Colombier #ifdef DEBUG 2813e12c5d1SDavid du Colombier static void 2823e12c5d1SDavid du Colombier dumpstack(void){ 2833e12c5d1SDavid du Colombier Node *stk; 2843e12c5d1SDavid du Colombier int *ip; 2853e12c5d1SDavid du Colombier 2863e12c5d1SDavid du Colombier print("operators\n"); 2873e12c5d1SDavid du Colombier for(ip=atorstack; ip<atorp; ip++) 2883e12c5d1SDavid du Colombier print("0%o\n", *ip); 2893e12c5d1SDavid du Colombier print("operands\n"); 2903e12c5d1SDavid du Colombier for(stk=andstack; stk<andp; stk++) 2913e12c5d1SDavid du Colombier print("0%o\t0%o\n", stk->first->type, stk->last->type); 2923e12c5d1SDavid du Colombier } 2933e12c5d1SDavid du Colombier 2943e12c5d1SDavid du Colombier static void 2953e12c5d1SDavid du Colombier dump(Reprog *pp) 2963e12c5d1SDavid du Colombier { 2973e12c5d1SDavid du Colombier Reinst *l; 2983e12c5d1SDavid du Colombier Rune *p; 2993e12c5d1SDavid du Colombier 3003e12c5d1SDavid du Colombier l = pp->firstinst; 3013e12c5d1SDavid du Colombier do{ 3023e12c5d1SDavid du Colombier print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type, 3033e12c5d1SDavid du Colombier l->left-pp->firstinst, l->right-pp->firstinst); 3043e12c5d1SDavid du Colombier if(l->type == RUNE) 3053e12c5d1SDavid du Colombier print("\t%C\n", l->r); 3063e12c5d1SDavid du Colombier else if(l->type == CCLASS || l->type == NCCLASS){ 3073e12c5d1SDavid du Colombier print("\t["); 3083e12c5d1SDavid du Colombier if(l->type == NCCLASS) 3093e12c5d1SDavid du Colombier print("^"); 3103e12c5d1SDavid du Colombier for(p = l->cp->spans; p < l->cp->end; p += 2) 3113e12c5d1SDavid du Colombier if(p[0] == p[1]) 3123e12c5d1SDavid du Colombier print("%C", p[0]); 3133e12c5d1SDavid du Colombier else 3143e12c5d1SDavid du Colombier print("%C-%C", p[0], p[1]); 3153e12c5d1SDavid du Colombier print("]\n"); 3163e12c5d1SDavid du Colombier } else 3173e12c5d1SDavid du Colombier print("\n"); 3183e12c5d1SDavid du Colombier }while(l++->type); 3193e12c5d1SDavid du Colombier } 3203e12c5d1SDavid du Colombier #endif 3213e12c5d1SDavid du Colombier 3223e12c5d1SDavid du Colombier static Reclass* 3233e12c5d1SDavid du Colombier newclass(void) 3243e12c5d1SDavid du Colombier { 3253e12c5d1SDavid du Colombier if(nclass >= NCLASS) 3263e12c5d1SDavid du Colombier regerr2("too many character classes; limit", NCLASS+'0'); 3273e12c5d1SDavid du Colombier return &(classp[nclass++]); 3283e12c5d1SDavid du Colombier } 3293e12c5d1SDavid du Colombier 3303e12c5d1SDavid du Colombier static int 3313e12c5d1SDavid du Colombier nextc(Rune *rp) 3323e12c5d1SDavid du Colombier { 3333e12c5d1SDavid du Colombier if(lexdone){ 3343e12c5d1SDavid du Colombier *rp = 0; 3353e12c5d1SDavid du Colombier return 1; 3363e12c5d1SDavid du Colombier } 3373e12c5d1SDavid du Colombier exprp += chartorune(rp, exprp); 3383e12c5d1SDavid du Colombier if(*rp == L'\\'){ 3393e12c5d1SDavid du Colombier exprp += chartorune(rp, exprp); 3403e12c5d1SDavid du Colombier return 1; 3413e12c5d1SDavid du Colombier } 3423e12c5d1SDavid du Colombier if(*rp == 0) 3433e12c5d1SDavid du Colombier lexdone = 1; 3443e12c5d1SDavid du Colombier return 0; 3453e12c5d1SDavid du Colombier } 3463e12c5d1SDavid du Colombier 3473e12c5d1SDavid du Colombier static int 3483e12c5d1SDavid du Colombier lex(int literal, int dot_type) 3493e12c5d1SDavid du Colombier { 3503e12c5d1SDavid du Colombier int quoted; 3513e12c5d1SDavid du Colombier 3523e12c5d1SDavid du Colombier quoted = nextc(&yyrune); 3533e12c5d1SDavid du Colombier if(literal || quoted){ 3543e12c5d1SDavid du Colombier if(yyrune == 0) 3553e12c5d1SDavid du Colombier return END; 3563e12c5d1SDavid du Colombier return RUNE; 3573e12c5d1SDavid du Colombier } 3583e12c5d1SDavid du Colombier 3593e12c5d1SDavid du Colombier switch(yyrune){ 3603e12c5d1SDavid du Colombier case 0: 3613e12c5d1SDavid du Colombier return END; 3623e12c5d1SDavid du Colombier case L'*': 3633e12c5d1SDavid du Colombier return STAR; 3643e12c5d1SDavid du Colombier case L'?': 3653e12c5d1SDavid du Colombier return QUEST; 3663e12c5d1SDavid du Colombier case L'+': 3673e12c5d1SDavid du Colombier return PLUS; 3683e12c5d1SDavid du Colombier case L'|': 3693e12c5d1SDavid du Colombier return OR; 3703e12c5d1SDavid du Colombier case L'.': 3713e12c5d1SDavid du Colombier return dot_type; 3723e12c5d1SDavid du Colombier case L'(': 3733e12c5d1SDavid du Colombier return LBRA; 3743e12c5d1SDavid du Colombier case L')': 3753e12c5d1SDavid du Colombier return RBRA; 3763e12c5d1SDavid du Colombier case L'^': 3773e12c5d1SDavid du Colombier return BOL; 3783e12c5d1SDavid du Colombier case L'$': 3793e12c5d1SDavid du Colombier return EOL; 3803e12c5d1SDavid du Colombier case L'[': 3813e12c5d1SDavid du Colombier return bldcclass(); 3823e12c5d1SDavid du Colombier } 3833e12c5d1SDavid du Colombier return RUNE; 3843e12c5d1SDavid du Colombier } 3853e12c5d1SDavid du Colombier 3863e12c5d1SDavid du Colombier static int 3873e12c5d1SDavid du Colombier bldcclass(void) 3883e12c5d1SDavid du Colombier { 3893e12c5d1SDavid du Colombier int type; 3903e12c5d1SDavid du Colombier Rune r[NCCRUNE]; 3913e12c5d1SDavid du Colombier Rune *p, *ep, *np; 3923e12c5d1SDavid du Colombier Rune rune; 3933e12c5d1SDavid du Colombier int quoted; 3943e12c5d1SDavid du Colombier 3953e12c5d1SDavid du Colombier /* we have already seen the '[' */ 3963e12c5d1SDavid du Colombier type = CCLASS; 3973e12c5d1SDavid du Colombier yyclassp = newclass(); 3983e12c5d1SDavid du Colombier 3993e12c5d1SDavid du Colombier /* look ahead for negation */ 4003e12c5d1SDavid du Colombier /* SPECIAL CASE!!! negated classes don't match \n */ 4013e12c5d1SDavid du Colombier ep = r; 4023e12c5d1SDavid du Colombier quoted = nextc(&rune); 4033e12c5d1SDavid du Colombier if(!quoted && rune == L'^'){ 4043e12c5d1SDavid du Colombier type = NCCLASS; 4053e12c5d1SDavid du Colombier quoted = nextc(&rune); 4063e12c5d1SDavid du Colombier *ep++ = L'\n'; 4073e12c5d1SDavid du Colombier *ep++ = L'\n'; 4083e12c5d1SDavid du Colombier } 4093e12c5d1SDavid du Colombier 4103e12c5d1SDavid du Colombier /* parse class into a set of spans */ 4113e12c5d1SDavid du Colombier for(; ep<&r[NCCRUNE];){ 4123e12c5d1SDavid du Colombier if(rune == 0){ 4133e12c5d1SDavid du Colombier rcerror("malformed '[]'"); 4143e12c5d1SDavid du Colombier return 0; 4153e12c5d1SDavid du Colombier } 4163e12c5d1SDavid du Colombier if(!quoted && rune == L']') 4173e12c5d1SDavid du Colombier break; 4183e12c5d1SDavid du Colombier if(!quoted && rune == L'-'){ 4193e12c5d1SDavid du Colombier if(ep == r){ 4203e12c5d1SDavid du Colombier rcerror("malformed '[]'"); 4213e12c5d1SDavid du Colombier return 0; 4223e12c5d1SDavid du Colombier } 4233e12c5d1SDavid du Colombier quoted = nextc(&rune); 4243e12c5d1SDavid du Colombier if((!quoted && rune == L']') || rune == 0){ 4253e12c5d1SDavid du Colombier rcerror("malformed '[]'"); 4263e12c5d1SDavid du Colombier return 0; 4273e12c5d1SDavid du Colombier } 4283e12c5d1SDavid du Colombier *(ep-1) = rune; 4293e12c5d1SDavid du Colombier } else { 4303e12c5d1SDavid du Colombier *ep++ = rune; 4313e12c5d1SDavid du Colombier *ep++ = rune; 4323e12c5d1SDavid du Colombier } 4333e12c5d1SDavid du Colombier quoted = nextc(&rune); 4343e12c5d1SDavid du Colombier } 4353e12c5d1SDavid du Colombier 4363e12c5d1SDavid du Colombier /* sort on span start */ 4373e12c5d1SDavid du Colombier for(p = r; p < ep; p += 2){ 4383e12c5d1SDavid du Colombier for(np = p; np < ep; np += 2) 4393e12c5d1SDavid du Colombier if(*np < *p){ 4403e12c5d1SDavid du Colombier rune = np[0]; 4413e12c5d1SDavid du Colombier np[0] = p[0]; 4423e12c5d1SDavid du Colombier p[0] = rune; 4433e12c5d1SDavid du Colombier rune = np[1]; 4443e12c5d1SDavid du Colombier np[1] = p[1]; 4453e12c5d1SDavid du Colombier p[1] = rune; 4463e12c5d1SDavid du Colombier } 4473e12c5d1SDavid du Colombier } 4483e12c5d1SDavid du Colombier 4493e12c5d1SDavid du Colombier /* merge spans */ 4503e12c5d1SDavid du Colombier np = yyclassp->spans; 4513e12c5d1SDavid du Colombier p = r; 4523e12c5d1SDavid du Colombier if(r == ep) 4533e12c5d1SDavid du Colombier yyclassp->end = np; 4543e12c5d1SDavid du Colombier else { 4553e12c5d1SDavid du Colombier np[0] = *p++; 4563e12c5d1SDavid du Colombier np[1] = *p++; 4573e12c5d1SDavid du Colombier for(; p < ep; p += 2) 4583e12c5d1SDavid du Colombier if(p[0] <= np[1]){ 4593e12c5d1SDavid du Colombier if(p[1] > np[1]) 4603e12c5d1SDavid du Colombier np[1] = p[1]; 4613e12c5d1SDavid du Colombier } else { 4623e12c5d1SDavid du Colombier np += 2; 4633e12c5d1SDavid du Colombier np[0] = p[0]; 4643e12c5d1SDavid du Colombier np[1] = p[1]; 4653e12c5d1SDavid du Colombier } 4663e12c5d1SDavid du Colombier yyclassp->end = np+2; 4673e12c5d1SDavid du Colombier } 4683e12c5d1SDavid du Colombier 4693e12c5d1SDavid du Colombier return type; 4703e12c5d1SDavid du Colombier } 4713e12c5d1SDavid du Colombier 4723e12c5d1SDavid du Colombier static Reprog* 4733e12c5d1SDavid du Colombier regcomp1(char *s, int literal, int dot_type) 4743e12c5d1SDavid du Colombier { 4753e12c5d1SDavid du Colombier int token; 4763e12c5d1SDavid du Colombier Reprog *pp; 4773e12c5d1SDavid du Colombier 4783e12c5d1SDavid du Colombier /* get memory for the program */ 4793e12c5d1SDavid du Colombier pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s)); 4803e12c5d1SDavid du Colombier if(pp == 0){ 4813e12c5d1SDavid du Colombier regerror("out of memory"); 4823e12c5d1SDavid du Colombier return 0; 4833e12c5d1SDavid du Colombier } 4843e12c5d1SDavid du Colombier freep = pp->firstinst; 4853e12c5d1SDavid du Colombier classp = pp->class; 4863e12c5d1SDavid du Colombier errors = 0; 4873e12c5d1SDavid du Colombier 4883e12c5d1SDavid du Colombier if(setjmp(regkaboom)) 4893e12c5d1SDavid du Colombier goto out; 4903e12c5d1SDavid du Colombier 4913e12c5d1SDavid du Colombier /* go compile the sucker */ 4923e12c5d1SDavid du Colombier lexdone = 0; 4933e12c5d1SDavid du Colombier exprp = s; 4943e12c5d1SDavid du Colombier nclass = 0; 4953e12c5d1SDavid du Colombier nbra = 0; 4963e12c5d1SDavid du Colombier atorp = atorstack; 4973e12c5d1SDavid du Colombier andp = andstack; 4983e12c5d1SDavid du Colombier subidp = subidstack; 4993e12c5d1SDavid du Colombier lastwasand = FALSE; 5003e12c5d1SDavid du Colombier cursubid = 0; 5013e12c5d1SDavid du Colombier 5023e12c5d1SDavid du Colombier /* Start with a low priority operator to prime parser */ 5033e12c5d1SDavid du Colombier pushator(START-1); 5043e12c5d1SDavid du Colombier while((token = lex(literal, dot_type)) != END){ 5053e12c5d1SDavid du Colombier if((token&0300) == OPERATOR) 5063e12c5d1SDavid du Colombier operator(token); 5073e12c5d1SDavid du Colombier else 5083e12c5d1SDavid du Colombier operand(token); 5093e12c5d1SDavid du Colombier } 5103e12c5d1SDavid du Colombier 5113e12c5d1SDavid du Colombier /* Close with a low priority operator */ 5123e12c5d1SDavid du Colombier evaluntil(START); 5133e12c5d1SDavid du Colombier 5143e12c5d1SDavid du Colombier /* Force END */ 5153e12c5d1SDavid du Colombier operand(END); 5163e12c5d1SDavid du Colombier evaluntil(START); 5173e12c5d1SDavid du Colombier #ifdef DEBUG 5183e12c5d1SDavid du Colombier dumpstack(); 5193e12c5d1SDavid du Colombier #endif 5203e12c5d1SDavid du Colombier if(nbra) 5213e12c5d1SDavid du Colombier rcerror("unmatched left paren"); 5223e12c5d1SDavid du Colombier --andp; /* points to first and only operand */ 5233e12c5d1SDavid du Colombier pp->startinst = andp->first; 5243e12c5d1SDavid du Colombier #ifdef DEBUG 5253e12c5d1SDavid du Colombier dump(pp); 5263e12c5d1SDavid du Colombier #endif 5273e12c5d1SDavid du Colombier pp = optimize(pp); 5283e12c5d1SDavid du Colombier #ifdef DEBUG 5293e12c5d1SDavid du Colombier print("start: %d\n", andp->first-pp->firstinst); 5303e12c5d1SDavid du Colombier dump(pp); 5313e12c5d1SDavid du Colombier #endif 5323e12c5d1SDavid du Colombier out: 5333e12c5d1SDavid du Colombier if(errors){ 5343e12c5d1SDavid du Colombier free(pp); 5353e12c5d1SDavid du Colombier pp = 0; 5363e12c5d1SDavid du Colombier } 5373e12c5d1SDavid du Colombier return pp; 5383e12c5d1SDavid du Colombier } 5393e12c5d1SDavid du Colombier 5403e12c5d1SDavid du Colombier extern Reprog* 5413e12c5d1SDavid du Colombier regcomp(char *s) 5423e12c5d1SDavid du Colombier { 5433e12c5d1SDavid du Colombier return regcomp1(s, 0, ANY); 5443e12c5d1SDavid du Colombier } 5453e12c5d1SDavid du Colombier 5463e12c5d1SDavid du Colombier extern Reprog* 5473e12c5d1SDavid du Colombier regcomplit(char *s) 5483e12c5d1SDavid du Colombier { 5493e12c5d1SDavid du Colombier return regcomp1(s, 1, ANY); 5503e12c5d1SDavid du Colombier } 5513e12c5d1SDavid du Colombier 5523e12c5d1SDavid du Colombier extern Reprog* 5533e12c5d1SDavid du Colombier regcompnl(char *s) 5543e12c5d1SDavid du Colombier { 5553e12c5d1SDavid du Colombier return regcomp1(s, 0, ANYNL); 5563e12c5d1SDavid du Colombier } 557