13e12c5d1SDavid du Colombier #include <u.h>
23e12c5d1SDavid du Colombier #include <libc.h>
33e12c5d1SDavid du Colombier #include "regexp.h"
43e12c5d1SDavid du Colombier #include "regcomp.h"
53e12c5d1SDavid du Colombier
63e12c5d1SDavid du Colombier #define TRUE 1
73e12c5d1SDavid du Colombier #define FALSE 0
83e12c5d1SDavid du Colombier
93e12c5d1SDavid du Colombier /*
103e12c5d1SDavid du Colombier * Parser Information
113e12c5d1SDavid du Colombier */
123e12c5d1SDavid du Colombier typedef
133e12c5d1SDavid du Colombier struct Node
143e12c5d1SDavid du Colombier {
153e12c5d1SDavid du Colombier Reinst* first;
163e12c5d1SDavid du Colombier Reinst* last;
173e12c5d1SDavid du Colombier }Node;
183e12c5d1SDavid du Colombier
19*3468a491SDavid du Colombier /* max character classes per program is nelem(reprog->class) */
20*3468a491SDavid du Colombier static Reprog *reprog;
21*3468a491SDavid du Colombier
22*3468a491SDavid du Colombier /* max rune ranges per character class is nelem(classp->spans)/2 */
23*3468a491SDavid du Colombier #define NCCRUNE nelem(classp->spans)
24*3468a491SDavid du Colombier
253e12c5d1SDavid du Colombier #define NSTACK 20
263e12c5d1SDavid du Colombier static Node andstack[NSTACK];
273e12c5d1SDavid du Colombier static Node *andp;
283e12c5d1SDavid du Colombier static int atorstack[NSTACK];
293e12c5d1SDavid du Colombier static int* atorp;
303e12c5d1SDavid du Colombier static int cursubid; /* id of current subexpression */
313e12c5d1SDavid du Colombier static int subidstack[NSTACK]; /* parallel to atorstack */
323e12c5d1SDavid du Colombier static int* subidp;
333e12c5d1SDavid du Colombier static int lastwasand; /* Last token was operand */
343e12c5d1SDavid du Colombier static int nbra;
353e12c5d1SDavid du Colombier static char* exprp; /* pointer to next character in source expression */
363e12c5d1SDavid du Colombier static int lexdone;
373e12c5d1SDavid du Colombier static int nclass;
383e12c5d1SDavid du Colombier static Reclass*classp;
393e12c5d1SDavid du Colombier static Reinst* freep;
403e12c5d1SDavid du Colombier static int errors;
413e12c5d1SDavid du Colombier static Rune yyrune; /* last lex'd rune */
423e12c5d1SDavid du Colombier static Reclass*yyclassp; /* last lex'd class */
433e12c5d1SDavid du Colombier
443e12c5d1SDavid du Colombier /* predeclared crap */
453e12c5d1SDavid du Colombier static void operator(int);
463e12c5d1SDavid du Colombier static void pushand(Reinst*, Reinst*);
473e12c5d1SDavid du Colombier static void pushator(int);
483e12c5d1SDavid du Colombier static void evaluntil(int);
493e12c5d1SDavid du Colombier static int bldcclass(void);
503e12c5d1SDavid du Colombier
513e12c5d1SDavid du Colombier static jmp_buf regkaboom;
523e12c5d1SDavid du Colombier
533e12c5d1SDavid du Colombier static void
rcerror(char * s)543e12c5d1SDavid du Colombier rcerror(char *s)
553e12c5d1SDavid du Colombier {
563e12c5d1SDavid du Colombier errors++;
573e12c5d1SDavid du Colombier regerror(s);
583e12c5d1SDavid du Colombier longjmp(regkaboom, 1);
593e12c5d1SDavid du Colombier }
603e12c5d1SDavid du Colombier
613e12c5d1SDavid du Colombier static Reinst*
newinst(int t)623e12c5d1SDavid du Colombier newinst(int t)
633e12c5d1SDavid du Colombier {
643e12c5d1SDavid du Colombier freep->type = t;
653e12c5d1SDavid du Colombier freep->left = 0;
663e12c5d1SDavid du Colombier freep->right = 0;
673e12c5d1SDavid du Colombier return freep++;
683e12c5d1SDavid du Colombier }
693e12c5d1SDavid du Colombier
703e12c5d1SDavid du Colombier static void
operand(int t)713e12c5d1SDavid du Colombier operand(int t)
723e12c5d1SDavid du Colombier {
733e12c5d1SDavid du Colombier Reinst *i;
743e12c5d1SDavid du Colombier
753e12c5d1SDavid du Colombier if(lastwasand)
763e12c5d1SDavid du Colombier operator(CAT); /* catenate is implicit */
773e12c5d1SDavid du Colombier i = newinst(t);
783e12c5d1SDavid du Colombier
793e12c5d1SDavid du Colombier if(t == CCLASS || t == NCCLASS)
803e12c5d1SDavid du Colombier i->cp = yyclassp;
813e12c5d1SDavid du Colombier if(t == RUNE)
823e12c5d1SDavid du Colombier i->r = yyrune;
833e12c5d1SDavid du Colombier
843e12c5d1SDavid du Colombier pushand(i, i);
853e12c5d1SDavid du Colombier lastwasand = TRUE;
863e12c5d1SDavid du Colombier }
873e12c5d1SDavid du Colombier
883e12c5d1SDavid du Colombier static void
operator(int t)893e12c5d1SDavid du Colombier operator(int t)
903e12c5d1SDavid du Colombier {
913e12c5d1SDavid du Colombier if(t==RBRA && --nbra<0)
923e12c5d1SDavid du Colombier rcerror("unmatched right paren");
933e12c5d1SDavid du Colombier if(t==LBRA){
943e12c5d1SDavid du Colombier if(++cursubid >= NSUBEXP)
953e12c5d1SDavid du Colombier rcerror ("too many subexpressions");
963e12c5d1SDavid du Colombier nbra++;
973e12c5d1SDavid du Colombier if(lastwasand)
983e12c5d1SDavid du Colombier operator(CAT);
993e12c5d1SDavid du Colombier } else
1003e12c5d1SDavid du Colombier evaluntil(t);
1013e12c5d1SDavid du Colombier if(t != RBRA)
1023e12c5d1SDavid du Colombier pushator(t);
1033e12c5d1SDavid du Colombier lastwasand = FALSE;
1043e12c5d1SDavid du Colombier if(t==STAR || t==QUEST || t==PLUS || t==RBRA)
1053e12c5d1SDavid du Colombier lastwasand = TRUE; /* these look like operands */
1063e12c5d1SDavid du Colombier }
1073e12c5d1SDavid du Colombier
1083e12c5d1SDavid du Colombier static void
regerr2(char * s,int c)1093e12c5d1SDavid du Colombier regerr2(char *s, int c)
1103e12c5d1SDavid du Colombier {
1113e12c5d1SDavid du Colombier char buf[100];
1123e12c5d1SDavid du Colombier char *cp = buf;
1133e12c5d1SDavid du Colombier while(*s)
1143e12c5d1SDavid du Colombier *cp++ = *s++;
1153e12c5d1SDavid du Colombier *cp++ = c;
1163e12c5d1SDavid du Colombier *cp = '\0';
1173e12c5d1SDavid du Colombier rcerror(buf);
1183e12c5d1SDavid du Colombier }
1193e12c5d1SDavid du Colombier
1203e12c5d1SDavid du Colombier static void
cant(char * s)1213e12c5d1SDavid du Colombier cant(char *s)
1223e12c5d1SDavid du Colombier {
1233e12c5d1SDavid du Colombier char buf[100];
1243e12c5d1SDavid du Colombier strcpy(buf, "can't happen: ");
1253e12c5d1SDavid du Colombier strcat(buf, s);
1263e12c5d1SDavid du Colombier rcerror(buf);
1273e12c5d1SDavid du Colombier }
1283e12c5d1SDavid du Colombier
1293e12c5d1SDavid du Colombier static void
pushand(Reinst * f,Reinst * l)1303e12c5d1SDavid du Colombier pushand(Reinst *f, Reinst *l)
1313e12c5d1SDavid du Colombier {
1323e12c5d1SDavid du Colombier if(andp >= &andstack[NSTACK])
1333e12c5d1SDavid du Colombier cant("operand stack overflow");
1343e12c5d1SDavid du Colombier andp->first = f;
1353e12c5d1SDavid du Colombier andp->last = l;
1363e12c5d1SDavid du Colombier andp++;
1373e12c5d1SDavid du Colombier }
1383e12c5d1SDavid du Colombier
1393e12c5d1SDavid du Colombier static void
pushator(int t)1403e12c5d1SDavid du Colombier pushator(int t)
1413e12c5d1SDavid du Colombier {
1423e12c5d1SDavid du Colombier if(atorp >= &atorstack[NSTACK])
1433e12c5d1SDavid du Colombier cant("operator stack overflow");
1443e12c5d1SDavid du Colombier *atorp++ = t;
1453e12c5d1SDavid du Colombier *subidp++ = cursubid;
1463e12c5d1SDavid du Colombier }
1473e12c5d1SDavid du Colombier
1483e12c5d1SDavid du Colombier static Node*
popand(int op)1493e12c5d1SDavid du Colombier popand(int op)
1503e12c5d1SDavid du Colombier {
1513e12c5d1SDavid du Colombier Reinst *inst;
1523e12c5d1SDavid du Colombier
1533e12c5d1SDavid du Colombier if(andp <= &andstack[0]){
1543e12c5d1SDavid du Colombier regerr2("missing operand for ", op);
1553e12c5d1SDavid du Colombier inst = newinst(NOP);
1563e12c5d1SDavid du Colombier pushand(inst,inst);
1573e12c5d1SDavid du Colombier }
1583e12c5d1SDavid du Colombier return --andp;
1593e12c5d1SDavid du Colombier }
1603e12c5d1SDavid du Colombier
1613e12c5d1SDavid du Colombier static int
popator(void)1623e12c5d1SDavid du Colombier popator(void)
1633e12c5d1SDavid du Colombier {
1643e12c5d1SDavid du Colombier if(atorp <= &atorstack[0])
1653e12c5d1SDavid du Colombier cant("operator stack underflow");
1663e12c5d1SDavid du Colombier --subidp;
1673e12c5d1SDavid du Colombier return *--atorp;
1683e12c5d1SDavid du Colombier }
1693e12c5d1SDavid du Colombier
1703e12c5d1SDavid du Colombier static void
evaluntil(int pri)1713e12c5d1SDavid du Colombier evaluntil(int pri)
1723e12c5d1SDavid du Colombier {
1733e12c5d1SDavid du Colombier Node *op1, *op2;
1743e12c5d1SDavid du Colombier Reinst *inst1, *inst2;
1753e12c5d1SDavid du Colombier
1763e12c5d1SDavid du Colombier while(pri==RBRA || atorp[-1]>=pri){
1773e12c5d1SDavid du Colombier switch(popator()){
1783e12c5d1SDavid du Colombier default:
1793e12c5d1SDavid du Colombier rcerror("unknown operator in evaluntil");
1803e12c5d1SDavid du Colombier break;
1813e12c5d1SDavid du Colombier case LBRA: /* must have been RBRA */
1823e12c5d1SDavid du Colombier op1 = popand('(');
1833e12c5d1SDavid du Colombier inst2 = newinst(RBRA);
1843e12c5d1SDavid du Colombier inst2->subid = *subidp;
1853e12c5d1SDavid du Colombier op1->last->next = inst2;
1863e12c5d1SDavid du Colombier inst1 = newinst(LBRA);
1873e12c5d1SDavid du Colombier inst1->subid = *subidp;
1883e12c5d1SDavid du Colombier inst1->next = op1->first;
1893e12c5d1SDavid du Colombier pushand(inst1, inst2);
1903e12c5d1SDavid du Colombier return;
1913e12c5d1SDavid du Colombier case OR:
1923e12c5d1SDavid du Colombier op2 = popand('|');
1933e12c5d1SDavid du Colombier op1 = popand('|');
1943e12c5d1SDavid du Colombier inst2 = newinst(NOP);
1953e12c5d1SDavid du Colombier op2->last->next = inst2;
1963e12c5d1SDavid du Colombier op1->last->next = inst2;
1973e12c5d1SDavid du Colombier inst1 = newinst(OR);
1983e12c5d1SDavid du Colombier inst1->right = op1->first;
1993e12c5d1SDavid du Colombier inst1->left = op2->first;
2003e12c5d1SDavid du Colombier pushand(inst1, inst2);
2013e12c5d1SDavid du Colombier break;
2023e12c5d1SDavid du Colombier case CAT:
2033e12c5d1SDavid du Colombier op2 = popand(0);
2043e12c5d1SDavid du Colombier op1 = popand(0);
2053e12c5d1SDavid du Colombier op1->last->next = op2->first;
2063e12c5d1SDavid du Colombier pushand(op1->first, op2->last);
2073e12c5d1SDavid du Colombier break;
2083e12c5d1SDavid du Colombier case STAR:
2093e12c5d1SDavid du Colombier op2 = popand('*');
2103e12c5d1SDavid du Colombier inst1 = newinst(OR);
2113e12c5d1SDavid du Colombier op2->last->next = inst1;
2123e12c5d1SDavid du Colombier inst1->right = op2->first;
2133e12c5d1SDavid du Colombier pushand(inst1, inst1);
2143e12c5d1SDavid du Colombier break;
2153e12c5d1SDavid du Colombier case PLUS:
2163e12c5d1SDavid du Colombier op2 = popand('+');
2173e12c5d1SDavid du Colombier inst1 = newinst(OR);
2183e12c5d1SDavid du Colombier op2->last->next = inst1;
2193e12c5d1SDavid du Colombier inst1->right = op2->first;
2203e12c5d1SDavid du Colombier pushand(op2->first, inst1);
2213e12c5d1SDavid du Colombier break;
2223e12c5d1SDavid du Colombier case QUEST:
2233e12c5d1SDavid du Colombier op2 = popand('?');
2243e12c5d1SDavid du Colombier inst1 = newinst(OR);
2253e12c5d1SDavid du Colombier inst2 = newinst(NOP);
2263e12c5d1SDavid du Colombier inst1->left = inst2;
2273e12c5d1SDavid du Colombier inst1->right = op2->first;
2283e12c5d1SDavid du Colombier op2->last->next = inst2;
2293e12c5d1SDavid du Colombier pushand(inst1, inst2);
2303e12c5d1SDavid du Colombier break;
2313e12c5d1SDavid du Colombier }
2323e12c5d1SDavid du Colombier }
2333e12c5d1SDavid du Colombier }
2343e12c5d1SDavid du Colombier
2353e12c5d1SDavid du Colombier static Reprog*
optimize(Reprog * pp)2363e12c5d1SDavid du Colombier optimize(Reprog *pp)
2373e12c5d1SDavid du Colombier {
2383e12c5d1SDavid du Colombier Reinst *inst, *target;
2393e12c5d1SDavid du Colombier int size;
2403e12c5d1SDavid du Colombier Reprog *npp;
241219b2ee8SDavid du Colombier Reclass *cl;
2423e12c5d1SDavid du Colombier int diff;
2433e12c5d1SDavid du Colombier
2443e12c5d1SDavid du Colombier /*
2453e12c5d1SDavid du Colombier * get rid of NOOP chains
2463e12c5d1SDavid du Colombier */
2473e12c5d1SDavid du Colombier for(inst=pp->firstinst; inst->type!=END; inst++){
2483e12c5d1SDavid du Colombier target = inst->next;
2493e12c5d1SDavid du Colombier while(target->type == NOP)
2503e12c5d1SDavid du Colombier target = target->next;
2513e12c5d1SDavid du Colombier inst->next = target;
2523e12c5d1SDavid du Colombier }
2533e12c5d1SDavid du Colombier
2543e12c5d1SDavid du Colombier /*
2553e12c5d1SDavid du Colombier * The original allocation is for an area larger than
2563e12c5d1SDavid du Colombier * necessary. Reallocate to the actual space used
2573e12c5d1SDavid du Colombier * and then relocate the code.
2583e12c5d1SDavid du Colombier */
2593e12c5d1SDavid du Colombier size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst);
2603e12c5d1SDavid du Colombier npp = realloc(pp, size);
2613e12c5d1SDavid du Colombier if(npp==0 || npp==pp)
2623e12c5d1SDavid du Colombier return pp;
2633e12c5d1SDavid du Colombier diff = (char *)npp - (char *)pp;
2643e12c5d1SDavid du Colombier freep = (Reinst *)((char *)freep + diff);
2653e12c5d1SDavid du Colombier for(inst=npp->firstinst; inst<freep; inst++){
2663e12c5d1SDavid du Colombier switch(inst->type){
2673e12c5d1SDavid du Colombier case OR:
2683e12c5d1SDavid du Colombier case STAR:
2693e12c5d1SDavid du Colombier case PLUS:
2703e12c5d1SDavid du Colombier case QUEST:
271219b2ee8SDavid du Colombier *(char **)&inst->right += diff;
272219b2ee8SDavid du Colombier break;
2733e12c5d1SDavid du Colombier case CCLASS:
2743e12c5d1SDavid du Colombier case NCCLASS:
2753e12c5d1SDavid du Colombier *(char **)&inst->right += diff;
276219b2ee8SDavid du Colombier cl = inst->cp;
277219b2ee8SDavid du Colombier *(char **)&cl->end += diff;
2783e12c5d1SDavid du Colombier break;
2793e12c5d1SDavid du Colombier }
2803e12c5d1SDavid du Colombier *(char **)&inst->left += diff;
2813e12c5d1SDavid du Colombier }
2823e12c5d1SDavid du Colombier *(char **)&npp->startinst += diff;
2833e12c5d1SDavid du Colombier return npp;
2843e12c5d1SDavid du Colombier }
2853e12c5d1SDavid du Colombier
2863e12c5d1SDavid du Colombier #ifdef DEBUG
2873e12c5d1SDavid du Colombier static void
dumpstack(void)2883e12c5d1SDavid du Colombier dumpstack(void){
2893e12c5d1SDavid du Colombier Node *stk;
2903e12c5d1SDavid du Colombier int *ip;
2913e12c5d1SDavid du Colombier
2923e12c5d1SDavid du Colombier print("operators\n");
2933e12c5d1SDavid du Colombier for(ip=atorstack; ip<atorp; ip++)
2943e12c5d1SDavid du Colombier print("0%o\n", *ip);
2953e12c5d1SDavid du Colombier print("operands\n");
2963e12c5d1SDavid du Colombier for(stk=andstack; stk<andp; stk++)
2973e12c5d1SDavid du Colombier print("0%o\t0%o\n", stk->first->type, stk->last->type);
2983e12c5d1SDavid du Colombier }
2993e12c5d1SDavid du Colombier
3003e12c5d1SDavid du Colombier static void
dump(Reprog * pp)3013e12c5d1SDavid du Colombier dump(Reprog *pp)
3023e12c5d1SDavid du Colombier {
3033e12c5d1SDavid du Colombier Reinst *l;
3043e12c5d1SDavid du Colombier Rune *p;
3053e12c5d1SDavid du Colombier
3063e12c5d1SDavid du Colombier l = pp->firstinst;
3073e12c5d1SDavid du Colombier do{
3083e12c5d1SDavid du Colombier print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type,
3093e12c5d1SDavid du Colombier l->left-pp->firstinst, l->right-pp->firstinst);
3103e12c5d1SDavid du Colombier if(l->type == RUNE)
3113e12c5d1SDavid du Colombier print("\t%C\n", l->r);
3123e12c5d1SDavid du Colombier else if(l->type == CCLASS || l->type == NCCLASS){
3133e12c5d1SDavid du Colombier print("\t[");
3143e12c5d1SDavid du Colombier if(l->type == NCCLASS)
3153e12c5d1SDavid du Colombier print("^");
3163e12c5d1SDavid du Colombier for(p = l->cp->spans; p < l->cp->end; p += 2)
3173e12c5d1SDavid du Colombier if(p[0] == p[1])
3183e12c5d1SDavid du Colombier print("%C", p[0]);
3193e12c5d1SDavid du Colombier else
3203e12c5d1SDavid du Colombier print("%C-%C", p[0], p[1]);
3213e12c5d1SDavid du Colombier print("]\n");
3223e12c5d1SDavid du Colombier } else
3233e12c5d1SDavid du Colombier print("\n");
3243e12c5d1SDavid du Colombier }while(l++->type);
3253e12c5d1SDavid du Colombier }
3263e12c5d1SDavid du Colombier #endif
3273e12c5d1SDavid du Colombier
3283e12c5d1SDavid du Colombier static Reclass*
newclass(void)3293e12c5d1SDavid du Colombier newclass(void)
3303e12c5d1SDavid du Colombier {
331*3468a491SDavid du Colombier if(nclass >= nelem(reprog->class))
332*3468a491SDavid du Colombier rcerror("too many character classes; increase Reprog.class size");
3333e12c5d1SDavid du Colombier return &(classp[nclass++]);
3343e12c5d1SDavid du Colombier }
3353e12c5d1SDavid du Colombier
3363e12c5d1SDavid du Colombier static int
nextc(Rune * rp)3373e12c5d1SDavid du Colombier nextc(Rune *rp)
3383e12c5d1SDavid du Colombier {
3393e12c5d1SDavid du Colombier if(lexdone){
3403e12c5d1SDavid du Colombier *rp = 0;
3413e12c5d1SDavid du Colombier return 1;
3423e12c5d1SDavid du Colombier }
3433e12c5d1SDavid du Colombier exprp += chartorune(rp, exprp);
3443e12c5d1SDavid du Colombier if(*rp == L'\\'){
3453e12c5d1SDavid du Colombier exprp += chartorune(rp, exprp);
3463e12c5d1SDavid du Colombier return 1;
3473e12c5d1SDavid du Colombier }
3483e12c5d1SDavid du Colombier if(*rp == 0)
3493e12c5d1SDavid du Colombier lexdone = 1;
3503e12c5d1SDavid du Colombier return 0;
3513e12c5d1SDavid du Colombier }
3523e12c5d1SDavid du Colombier
3533e12c5d1SDavid du Colombier static int
lex(int literal,int dot_type)3543e12c5d1SDavid du Colombier lex(int literal, int dot_type)
3553e12c5d1SDavid du Colombier {
3563e12c5d1SDavid du Colombier int quoted;
3573e12c5d1SDavid du Colombier
3583e12c5d1SDavid du Colombier quoted = nextc(&yyrune);
3593e12c5d1SDavid du Colombier if(literal || quoted){
3603e12c5d1SDavid du Colombier if(yyrune == 0)
3613e12c5d1SDavid du Colombier return END;
3623e12c5d1SDavid du Colombier return RUNE;
3633e12c5d1SDavid du Colombier }
3643e12c5d1SDavid du Colombier
3653e12c5d1SDavid du Colombier switch(yyrune){
3663e12c5d1SDavid du Colombier case 0:
3673e12c5d1SDavid du Colombier return END;
3683e12c5d1SDavid du Colombier case L'*':
3693e12c5d1SDavid du Colombier return STAR;
3703e12c5d1SDavid du Colombier case L'?':
3713e12c5d1SDavid du Colombier return QUEST;
3723e12c5d1SDavid du Colombier case L'+':
3733e12c5d1SDavid du Colombier return PLUS;
3743e12c5d1SDavid du Colombier case L'|':
3753e12c5d1SDavid du Colombier return OR;
3763e12c5d1SDavid du Colombier case L'.':
3773e12c5d1SDavid du Colombier return dot_type;
3783e12c5d1SDavid du Colombier case L'(':
3793e12c5d1SDavid du Colombier return LBRA;
3803e12c5d1SDavid du Colombier case L')':
3813e12c5d1SDavid du Colombier return RBRA;
3823e12c5d1SDavid du Colombier case L'^':
3833e12c5d1SDavid du Colombier return BOL;
3843e12c5d1SDavid du Colombier case L'$':
3853e12c5d1SDavid du Colombier return EOL;
3863e12c5d1SDavid du Colombier case L'[':
3873e12c5d1SDavid du Colombier return bldcclass();
3883e12c5d1SDavid du Colombier }
3893e12c5d1SDavid du Colombier return RUNE;
3903e12c5d1SDavid du Colombier }
3913e12c5d1SDavid du Colombier
3923e12c5d1SDavid du Colombier static int
bldcclass(void)3933e12c5d1SDavid du Colombier bldcclass(void)
3943e12c5d1SDavid du Colombier {
3953e12c5d1SDavid du Colombier int type;
3963e12c5d1SDavid du Colombier Rune r[NCCRUNE];
3973e12c5d1SDavid du Colombier Rune *p, *ep, *np;
3983e12c5d1SDavid du Colombier Rune rune;
3993e12c5d1SDavid du Colombier int quoted;
4003e12c5d1SDavid du Colombier
4013e12c5d1SDavid du Colombier /* we have already seen the '[' */
4023e12c5d1SDavid du Colombier type = CCLASS;
4033e12c5d1SDavid du Colombier yyclassp = newclass();
4043e12c5d1SDavid du Colombier
4053e12c5d1SDavid du Colombier /* look ahead for negation */
4063e12c5d1SDavid du Colombier /* SPECIAL CASE!!! negated classes don't match \n */
4073e12c5d1SDavid du Colombier ep = r;
4083e12c5d1SDavid du Colombier quoted = nextc(&rune);
4093e12c5d1SDavid du Colombier if(!quoted && rune == L'^'){
4103e12c5d1SDavid du Colombier type = NCCLASS;
4113e12c5d1SDavid du Colombier quoted = nextc(&rune);
4123e12c5d1SDavid du Colombier *ep++ = L'\n';
4133e12c5d1SDavid du Colombier *ep++ = L'\n';
4143e12c5d1SDavid du Colombier }
4153e12c5d1SDavid du Colombier
4163e12c5d1SDavid du Colombier /* parse class into a set of spans */
417*3468a491SDavid du Colombier while(ep < &r[NCCRUNE-1]){
4183e12c5d1SDavid du Colombier if(rune == 0){
4193e12c5d1SDavid du Colombier rcerror("malformed '[]'");
4203e12c5d1SDavid du Colombier return 0;
4213e12c5d1SDavid du Colombier }
4223e12c5d1SDavid du Colombier if(!quoted && rune == L']')
4233e12c5d1SDavid du Colombier break;
4243e12c5d1SDavid du Colombier if(!quoted && rune == L'-'){
4253e12c5d1SDavid du Colombier if(ep == r){
4263e12c5d1SDavid du Colombier rcerror("malformed '[]'");
4273e12c5d1SDavid du Colombier return 0;
4283e12c5d1SDavid du Colombier }
4293e12c5d1SDavid du Colombier quoted = nextc(&rune);
4303e12c5d1SDavid du Colombier if((!quoted && rune == L']') || rune == 0){
4313e12c5d1SDavid du Colombier rcerror("malformed '[]'");
4323e12c5d1SDavid du Colombier return 0;
4333e12c5d1SDavid du Colombier }
4343e12c5d1SDavid du Colombier *(ep-1) = rune;
4353e12c5d1SDavid du Colombier } else {
4363e12c5d1SDavid du Colombier *ep++ = rune;
4373e12c5d1SDavid du Colombier *ep++ = rune;
4383e12c5d1SDavid du Colombier }
4393e12c5d1SDavid du Colombier quoted = nextc(&rune);
4403e12c5d1SDavid du Colombier }
441*3468a491SDavid du Colombier if(ep >= &r[NCCRUNE-1]) {
442*3468a491SDavid du Colombier rcerror("char class too large; increase Reclass.spans size");
443*3468a491SDavid du Colombier return 0;
444*3468a491SDavid du Colombier }
4453e12c5d1SDavid du Colombier
4463e12c5d1SDavid du Colombier /* sort on span start */
4473e12c5d1SDavid du Colombier for(p = r; p < ep; p += 2){
4483e12c5d1SDavid du Colombier for(np = p; np < ep; np += 2)
4493e12c5d1SDavid du Colombier if(*np < *p){
4503e12c5d1SDavid du Colombier rune = np[0];
4513e12c5d1SDavid du Colombier np[0] = p[0];
4523e12c5d1SDavid du Colombier p[0] = rune;
4533e12c5d1SDavid du Colombier rune = np[1];
4543e12c5d1SDavid du Colombier np[1] = p[1];
4553e12c5d1SDavid du Colombier p[1] = rune;
4563e12c5d1SDavid du Colombier }
4573e12c5d1SDavid du Colombier }
4583e12c5d1SDavid du Colombier
4593e12c5d1SDavid du Colombier /* merge spans */
4603e12c5d1SDavid du Colombier np = yyclassp->spans;
4613e12c5d1SDavid du Colombier p = r;
4623e12c5d1SDavid du Colombier if(r == ep)
4633e12c5d1SDavid du Colombier yyclassp->end = np;
4643e12c5d1SDavid du Colombier else {
4653e12c5d1SDavid du Colombier np[0] = *p++;
4663e12c5d1SDavid du Colombier np[1] = *p++;
4673e12c5d1SDavid du Colombier for(; p < ep; p += 2)
468*3468a491SDavid du Colombier /* overlapping or adjacent ranges? */
469*3468a491SDavid du Colombier if(p[0] <= np[1] + 1){
470*3468a491SDavid du Colombier if(p[1] >= np[1])
471*3468a491SDavid du Colombier np[1] = p[1]; /* coalesce */
4723e12c5d1SDavid du Colombier } else {
4733e12c5d1SDavid du Colombier np += 2;
4743e12c5d1SDavid du Colombier np[0] = p[0];
4753e12c5d1SDavid du Colombier np[1] = p[1];
4763e12c5d1SDavid du Colombier }
4773e12c5d1SDavid du Colombier yyclassp->end = np+2;
4783e12c5d1SDavid du Colombier }
4793e12c5d1SDavid du Colombier
4803e12c5d1SDavid du Colombier return type;
4813e12c5d1SDavid du Colombier }
4823e12c5d1SDavid du Colombier
4833e12c5d1SDavid du Colombier static Reprog*
regcomp1(char * s,int literal,int dot_type)4843e12c5d1SDavid du Colombier regcomp1(char *s, int literal, int dot_type)
4853e12c5d1SDavid du Colombier {
4863e12c5d1SDavid du Colombier int token;
4873e12c5d1SDavid du Colombier Reprog *pp;
4883e12c5d1SDavid du Colombier
4893e12c5d1SDavid du Colombier /* get memory for the program */
4903e12c5d1SDavid du Colombier pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s));
4913e12c5d1SDavid du Colombier if(pp == 0){
4923e12c5d1SDavid du Colombier regerror("out of memory");
4933e12c5d1SDavid du Colombier return 0;
4943e12c5d1SDavid du Colombier }
4953e12c5d1SDavid du Colombier freep = pp->firstinst;
4963e12c5d1SDavid du Colombier classp = pp->class;
4973e12c5d1SDavid du Colombier errors = 0;
4983e12c5d1SDavid du Colombier
4993e12c5d1SDavid du Colombier if(setjmp(regkaboom))
5003e12c5d1SDavid du Colombier goto out;
5013e12c5d1SDavid du Colombier
5023e12c5d1SDavid du Colombier /* go compile the sucker */
5033e12c5d1SDavid du Colombier lexdone = 0;
5043e12c5d1SDavid du Colombier exprp = s;
5053e12c5d1SDavid du Colombier nclass = 0;
5063e12c5d1SDavid du Colombier nbra = 0;
5073e12c5d1SDavid du Colombier atorp = atorstack;
5083e12c5d1SDavid du Colombier andp = andstack;
5093e12c5d1SDavid du Colombier subidp = subidstack;
5103e12c5d1SDavid du Colombier lastwasand = FALSE;
5113e12c5d1SDavid du Colombier cursubid = 0;
5123e12c5d1SDavid du Colombier
5133e12c5d1SDavid du Colombier /* Start with a low priority operator to prime parser */
5143e12c5d1SDavid du Colombier pushator(START-1);
5153e12c5d1SDavid du Colombier while((token = lex(literal, dot_type)) != END){
5163e12c5d1SDavid du Colombier if((token&0300) == OPERATOR)
5173e12c5d1SDavid du Colombier operator(token);
5183e12c5d1SDavid du Colombier else
5193e12c5d1SDavid du Colombier operand(token);
5203e12c5d1SDavid du Colombier }
5213e12c5d1SDavid du Colombier
5223e12c5d1SDavid du Colombier /* Close with a low priority operator */
5233e12c5d1SDavid du Colombier evaluntil(START);
5243e12c5d1SDavid du Colombier
5253e12c5d1SDavid du Colombier /* Force END */
5263e12c5d1SDavid du Colombier operand(END);
5273e12c5d1SDavid du Colombier evaluntil(START);
5283e12c5d1SDavid du Colombier #ifdef DEBUG
5293e12c5d1SDavid du Colombier dumpstack();
5303e12c5d1SDavid du Colombier #endif
5313e12c5d1SDavid du Colombier if(nbra)
5323e12c5d1SDavid du Colombier rcerror("unmatched left paren");
5333e12c5d1SDavid du Colombier --andp; /* points to first and only operand */
5343e12c5d1SDavid du Colombier pp->startinst = andp->first;
5353e12c5d1SDavid du Colombier #ifdef DEBUG
5363e12c5d1SDavid du Colombier dump(pp);
5373e12c5d1SDavid du Colombier #endif
5383e12c5d1SDavid du Colombier pp = optimize(pp);
5393e12c5d1SDavid du Colombier #ifdef DEBUG
5403e12c5d1SDavid du Colombier print("start: %d\n", andp->first-pp->firstinst);
5413e12c5d1SDavid du Colombier dump(pp);
5423e12c5d1SDavid du Colombier #endif
5433e12c5d1SDavid du Colombier out:
5443e12c5d1SDavid du Colombier if(errors){
5453e12c5d1SDavid du Colombier free(pp);
5463e12c5d1SDavid du Colombier pp = 0;
5473e12c5d1SDavid du Colombier }
5483e12c5d1SDavid du Colombier return pp;
5493e12c5d1SDavid du Colombier }
5503e12c5d1SDavid du Colombier
5513e12c5d1SDavid du Colombier extern Reprog*
regcomp(char * s)5523e12c5d1SDavid du Colombier regcomp(char *s)
5533e12c5d1SDavid du Colombier {
5543e12c5d1SDavid du Colombier return regcomp1(s, 0, ANY);
5553e12c5d1SDavid du Colombier }
5563e12c5d1SDavid du Colombier
5573e12c5d1SDavid du Colombier extern Reprog*
regcomplit(char * s)5583e12c5d1SDavid du Colombier regcomplit(char *s)
5593e12c5d1SDavid du Colombier {
5603e12c5d1SDavid du Colombier return regcomp1(s, 1, ANY);
5613e12c5d1SDavid du Colombier }
5623e12c5d1SDavid du Colombier
5633e12c5d1SDavid du Colombier extern Reprog*
regcompnl(char * s)5643e12c5d1SDavid du Colombier regcompnl(char *s)
5653e12c5d1SDavid du Colombier {
5663e12c5d1SDavid du Colombier return regcomp1(s, 0, ANYNL);
5673e12c5d1SDavid du Colombier }
568