1*4246b616SDavid du Colombier /* From libregexp but leaks extra classes when it runs out */
2*4246b616SDavid du Colombier
3*4246b616SDavid du Colombier
4*4246b616SDavid du Colombier #include <u.h>
5*4246b616SDavid du Colombier #include <libc.h>
6*4246b616SDavid du Colombier #include "regexp.h"
7*4246b616SDavid du Colombier #include "/sys/src/libregexp/regcomp.h"
8*4246b616SDavid du Colombier
9*4246b616SDavid du Colombier #define TRUE 1
10*4246b616SDavid du Colombier #define FALSE 0
11*4246b616SDavid du Colombier
12*4246b616SDavid du Colombier /*
13*4246b616SDavid du Colombier * Parser Information
14*4246b616SDavid du Colombier */
15*4246b616SDavid du Colombier typedef
16*4246b616SDavid du Colombier struct Node
17*4246b616SDavid du Colombier {
18*4246b616SDavid du Colombier Reinst* first;
19*4246b616SDavid du Colombier Reinst* last;
20*4246b616SDavid du Colombier }Node;
21*4246b616SDavid du Colombier
22*4246b616SDavid du Colombier #define NSTACK 20
23*4246b616SDavid du Colombier static Node andstack[NSTACK];
24*4246b616SDavid du Colombier static Node *andp;
25*4246b616SDavid du Colombier static int atorstack[NSTACK];
26*4246b616SDavid du Colombier static int* atorp;
27*4246b616SDavid du Colombier static int cursubid; /* id of current subexpression */
28*4246b616SDavid du Colombier static int subidstack[NSTACK]; /* parallel to atorstack */
29*4246b616SDavid du Colombier static int* subidp;
30*4246b616SDavid du Colombier static int lastwasand; /* Last token was operand */
31*4246b616SDavid du Colombier static int nbra;
32*4246b616SDavid du Colombier static char* exprp; /* pointer to next character in source expression */
33*4246b616SDavid du Colombier static int lexdone;
34*4246b616SDavid du Colombier static int nclass;
35*4246b616SDavid du Colombier static Reclass*classp;
36*4246b616SDavid du Colombier static Reinst* freep;
37*4246b616SDavid du Colombier static int errors;
38*4246b616SDavid du Colombier static Rune yyrune; /* last lex'd rune */
39*4246b616SDavid du Colombier static Reclass*yyclassp; /* last lex'd class */
40*4246b616SDavid du Colombier
41*4246b616SDavid du Colombier /* predeclared crap */
42*4246b616SDavid du Colombier static void operator(int);
43*4246b616SDavid du Colombier static void pushand(Reinst*, Reinst*);
44*4246b616SDavid du Colombier static void pushator(int);
45*4246b616SDavid du Colombier static void evaluntil(int);
46*4246b616SDavid du Colombier static int bldcclass(void);
47*4246b616SDavid du Colombier
48*4246b616SDavid du Colombier static jmp_buf regkaboom;
49*4246b616SDavid du Colombier
50*4246b616SDavid du Colombier static void
rcerror(char * s)51*4246b616SDavid du Colombier rcerror(char *s)
52*4246b616SDavid du Colombier {
53*4246b616SDavid du Colombier errors++;
54*4246b616SDavid du Colombier regerror(s);
55*4246b616SDavid du Colombier longjmp(regkaboom, 1);
56*4246b616SDavid du Colombier }
57*4246b616SDavid du Colombier
58*4246b616SDavid du Colombier static Reinst*
newinst(int t)59*4246b616SDavid du Colombier newinst(int t)
60*4246b616SDavid du Colombier {
61*4246b616SDavid du Colombier freep->type = t;
62*4246b616SDavid du Colombier freep->left = 0;
63*4246b616SDavid du Colombier freep->right = 0;
64*4246b616SDavid du Colombier return freep++;
65*4246b616SDavid du Colombier }
66*4246b616SDavid du Colombier
67*4246b616SDavid du Colombier static void
operand(int t)68*4246b616SDavid du Colombier operand(int t)
69*4246b616SDavid du Colombier {
70*4246b616SDavid du Colombier Reinst *i;
71*4246b616SDavid du Colombier
72*4246b616SDavid du Colombier if(lastwasand)
73*4246b616SDavid du Colombier operator(CAT); /* catenate is implicit */
74*4246b616SDavid du Colombier i = newinst(t);
75*4246b616SDavid du Colombier
76*4246b616SDavid du Colombier if(t == CCLASS || t == NCCLASS)
77*4246b616SDavid du Colombier i->cp = yyclassp;
78*4246b616SDavid du Colombier if(t == RUNE)
79*4246b616SDavid du Colombier i->r = yyrune;
80*4246b616SDavid du Colombier
81*4246b616SDavid du Colombier pushand(i, i);
82*4246b616SDavid du Colombier lastwasand = TRUE;
83*4246b616SDavid du Colombier }
84*4246b616SDavid du Colombier
85*4246b616SDavid du Colombier static void
operator(int t)86*4246b616SDavid du Colombier operator(int t)
87*4246b616SDavid du Colombier {
88*4246b616SDavid du Colombier if(t==RBRA && --nbra<0)
89*4246b616SDavid du Colombier rcerror("unmatched right paren");
90*4246b616SDavid du Colombier if(t==LBRA){
91*4246b616SDavid du Colombier if(++cursubid >= NSUBEXP)
92*4246b616SDavid du Colombier rcerror ("too many subexpressions");
93*4246b616SDavid du Colombier nbra++;
94*4246b616SDavid du Colombier if(lastwasand)
95*4246b616SDavid du Colombier operator(CAT);
96*4246b616SDavid du Colombier } else
97*4246b616SDavid du Colombier evaluntil(t);
98*4246b616SDavid du Colombier if(t != RBRA)
99*4246b616SDavid du Colombier pushator(t);
100*4246b616SDavid du Colombier lastwasand = FALSE;
101*4246b616SDavid du Colombier if(t==STAR || t==QUEST || t==PLUS || t==RBRA)
102*4246b616SDavid du Colombier lastwasand = TRUE; /* these look like operands */
103*4246b616SDavid du Colombier }
104*4246b616SDavid du Colombier
105*4246b616SDavid du Colombier static void
regerr2(char * s,int c)106*4246b616SDavid du Colombier regerr2(char *s, int c)
107*4246b616SDavid du Colombier {
108*4246b616SDavid du Colombier char buf[100];
109*4246b616SDavid du Colombier char *cp = buf;
110*4246b616SDavid du Colombier while(*s)
111*4246b616SDavid du Colombier *cp++ = *s++;
112*4246b616SDavid du Colombier *cp++ = c;
113*4246b616SDavid du Colombier *cp = '\0';
114*4246b616SDavid du Colombier rcerror(buf);
115*4246b616SDavid du Colombier }
116*4246b616SDavid du Colombier
117*4246b616SDavid du Colombier static void
cant(char * s)118*4246b616SDavid du Colombier cant(char *s)
119*4246b616SDavid du Colombier {
120*4246b616SDavid du Colombier char buf[100];
121*4246b616SDavid du Colombier strcpy(buf, "can't happen: ");
122*4246b616SDavid du Colombier strcat(buf, s);
123*4246b616SDavid du Colombier rcerror(buf);
124*4246b616SDavid du Colombier }
125*4246b616SDavid du Colombier
126*4246b616SDavid du Colombier static void
pushand(Reinst * f,Reinst * l)127*4246b616SDavid du Colombier pushand(Reinst *f, Reinst *l)
128*4246b616SDavid du Colombier {
129*4246b616SDavid du Colombier if(andp >= &andstack[NSTACK])
130*4246b616SDavid du Colombier cant("operand stack overflow");
131*4246b616SDavid du Colombier andp->first = f;
132*4246b616SDavid du Colombier andp->last = l;
133*4246b616SDavid du Colombier andp++;
134*4246b616SDavid du Colombier }
135*4246b616SDavid du Colombier
136*4246b616SDavid du Colombier static void
pushator(int t)137*4246b616SDavid du Colombier pushator(int t)
138*4246b616SDavid du Colombier {
139*4246b616SDavid du Colombier if(atorp >= &atorstack[NSTACK])
140*4246b616SDavid du Colombier cant("operator stack overflow");
141*4246b616SDavid du Colombier *atorp++ = t;
142*4246b616SDavid du Colombier *subidp++ = cursubid;
143*4246b616SDavid du Colombier }
144*4246b616SDavid du Colombier
145*4246b616SDavid du Colombier static Node*
popand(int op)146*4246b616SDavid du Colombier popand(int op)
147*4246b616SDavid du Colombier {
148*4246b616SDavid du Colombier Reinst *inst;
149*4246b616SDavid du Colombier
150*4246b616SDavid du Colombier if(andp <= &andstack[0]){
151*4246b616SDavid du Colombier regerr2("missing operand for ", op);
152*4246b616SDavid du Colombier inst = newinst(NOP);
153*4246b616SDavid du Colombier pushand(inst,inst);
154*4246b616SDavid du Colombier }
155*4246b616SDavid du Colombier return --andp;
156*4246b616SDavid du Colombier }
157*4246b616SDavid du Colombier
158*4246b616SDavid du Colombier static int
popator(void)159*4246b616SDavid du Colombier popator(void)
160*4246b616SDavid du Colombier {
161*4246b616SDavid du Colombier if(atorp <= &atorstack[0])
162*4246b616SDavid du Colombier cant("operator stack underflow");
163*4246b616SDavid du Colombier --subidp;
164*4246b616SDavid du Colombier return *--atorp;
165*4246b616SDavid du Colombier }
166*4246b616SDavid du Colombier
167*4246b616SDavid du Colombier static void
evaluntil(int pri)168*4246b616SDavid du Colombier evaluntil(int pri)
169*4246b616SDavid du Colombier {
170*4246b616SDavid du Colombier Node *op1, *op2;
171*4246b616SDavid du Colombier Reinst *inst1, *inst2;
172*4246b616SDavid du Colombier
173*4246b616SDavid du Colombier while(pri==RBRA || atorp[-1]>=pri){
174*4246b616SDavid du Colombier switch(popator()){
175*4246b616SDavid du Colombier default:
176*4246b616SDavid du Colombier rcerror("unknown operator in evaluntil");
177*4246b616SDavid du Colombier break;
178*4246b616SDavid du Colombier case LBRA: /* must have been RBRA */
179*4246b616SDavid du Colombier op1 = popand('(');
180*4246b616SDavid du Colombier inst2 = newinst(RBRA);
181*4246b616SDavid du Colombier inst2->subid = *subidp;
182*4246b616SDavid du Colombier op1->last->next = inst2;
183*4246b616SDavid du Colombier inst1 = newinst(LBRA);
184*4246b616SDavid du Colombier inst1->subid = *subidp;
185*4246b616SDavid du Colombier inst1->next = op1->first;
186*4246b616SDavid du Colombier pushand(inst1, inst2);
187*4246b616SDavid du Colombier return;
188*4246b616SDavid du Colombier case OR:
189*4246b616SDavid du Colombier op2 = popand('|');
190*4246b616SDavid du Colombier op1 = popand('|');
191*4246b616SDavid du Colombier inst2 = newinst(NOP);
192*4246b616SDavid du Colombier op2->last->next = inst2;
193*4246b616SDavid du Colombier op1->last->next = inst2;
194*4246b616SDavid du Colombier inst1 = newinst(OR);
195*4246b616SDavid du Colombier inst1->right = op1->first;
196*4246b616SDavid du Colombier inst1->left = op2->first;
197*4246b616SDavid du Colombier pushand(inst1, inst2);
198*4246b616SDavid du Colombier break;
199*4246b616SDavid du Colombier case CAT:
200*4246b616SDavid du Colombier op2 = popand(0);
201*4246b616SDavid du Colombier op1 = popand(0);
202*4246b616SDavid du Colombier op1->last->next = op2->first;
203*4246b616SDavid du Colombier pushand(op1->first, op2->last);
204*4246b616SDavid du Colombier break;
205*4246b616SDavid du Colombier case STAR:
206*4246b616SDavid du Colombier op2 = popand('*');
207*4246b616SDavid du Colombier inst1 = newinst(OR);
208*4246b616SDavid du Colombier op2->last->next = inst1;
209*4246b616SDavid du Colombier inst1->right = op2->first;
210*4246b616SDavid du Colombier pushand(inst1, inst1);
211*4246b616SDavid du Colombier break;
212*4246b616SDavid du Colombier case PLUS:
213*4246b616SDavid du Colombier op2 = popand('+');
214*4246b616SDavid du Colombier inst1 = newinst(OR);
215*4246b616SDavid du Colombier op2->last->next = inst1;
216*4246b616SDavid du Colombier inst1->right = op2->first;
217*4246b616SDavid du Colombier pushand(op2->first, inst1);
218*4246b616SDavid du Colombier break;
219*4246b616SDavid du Colombier case QUEST:
220*4246b616SDavid du Colombier op2 = popand('?');
221*4246b616SDavid du Colombier inst1 = newinst(OR);
222*4246b616SDavid du Colombier inst2 = newinst(NOP);
223*4246b616SDavid du Colombier inst1->left = inst2;
224*4246b616SDavid du Colombier inst1->right = op2->first;
225*4246b616SDavid du Colombier op2->last->next = inst2;
226*4246b616SDavid du Colombier pushand(inst1, inst2);
227*4246b616SDavid du Colombier break;
228*4246b616SDavid du Colombier }
229*4246b616SDavid du Colombier }
230*4246b616SDavid du Colombier }
231*4246b616SDavid du Colombier
232*4246b616SDavid du Colombier static Reprog*
optimize(Reprog * pp)233*4246b616SDavid du Colombier optimize(Reprog *pp)
234*4246b616SDavid du Colombier {
235*4246b616SDavid du Colombier Reinst *inst, *target;
236*4246b616SDavid du Colombier int size;
237*4246b616SDavid du Colombier Reprog *npp;
238*4246b616SDavid du Colombier Reclass *cl;
239*4246b616SDavid du Colombier int diff;
240*4246b616SDavid du Colombier
241*4246b616SDavid du Colombier /*
242*4246b616SDavid du Colombier * get rid of NOOP chains
243*4246b616SDavid du Colombier */
244*4246b616SDavid du Colombier for(inst=pp->firstinst; inst->type!=END; inst++){
245*4246b616SDavid du Colombier target = inst->next;
246*4246b616SDavid du Colombier while(target->type == NOP)
247*4246b616SDavid du Colombier target = target->next;
248*4246b616SDavid du Colombier inst->next = target;
249*4246b616SDavid du Colombier }
250*4246b616SDavid du Colombier
251*4246b616SDavid du Colombier /*
252*4246b616SDavid du Colombier * The original allocation is for an area larger than
253*4246b616SDavid du Colombier * necessary. Reallocate to the actual space used
254*4246b616SDavid du Colombier * and then relocate the code.
255*4246b616SDavid du Colombier */
256*4246b616SDavid du Colombier size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst);
257*4246b616SDavid du Colombier npp = realloc(pp, size);
258*4246b616SDavid du Colombier if(npp==0 || npp==pp)
259*4246b616SDavid du Colombier return pp;
260*4246b616SDavid du Colombier diff = (char *)npp - (char *)pp;
261*4246b616SDavid du Colombier freep = (Reinst *)((char *)freep + diff);
262*4246b616SDavid du Colombier for(inst=npp->firstinst; inst<freep; inst++){
263*4246b616SDavid du Colombier switch(inst->type){
264*4246b616SDavid du Colombier case OR:
265*4246b616SDavid du Colombier case STAR:
266*4246b616SDavid du Colombier case PLUS:
267*4246b616SDavid du Colombier case QUEST:
268*4246b616SDavid du Colombier *(char **)&inst->right += diff;
269*4246b616SDavid du Colombier break;
270*4246b616SDavid du Colombier case CCLASS:
271*4246b616SDavid du Colombier case NCCLASS:
272*4246b616SDavid du Colombier *(char **)&inst->right += diff;
273*4246b616SDavid du Colombier cl = inst->cp;
274*4246b616SDavid du Colombier *(char **)&cl->end += diff;
275*4246b616SDavid du Colombier break;
276*4246b616SDavid du Colombier }
277*4246b616SDavid du Colombier *(char **)&inst->left += diff;
278*4246b616SDavid du Colombier }
279*4246b616SDavid du Colombier *(char **)&npp->startinst += diff;
280*4246b616SDavid du Colombier return npp;
281*4246b616SDavid du Colombier }
282*4246b616SDavid du Colombier
283*4246b616SDavid du Colombier #ifdef DEBUG
284*4246b616SDavid du Colombier static void
dumpstack(void)285*4246b616SDavid du Colombier dumpstack(void){
286*4246b616SDavid du Colombier Node *stk;
287*4246b616SDavid du Colombier int *ip;
288*4246b616SDavid du Colombier
289*4246b616SDavid du Colombier print("operators\n");
290*4246b616SDavid du Colombier for(ip=atorstack; ip<atorp; ip++)
291*4246b616SDavid du Colombier print("0%o\n", *ip);
292*4246b616SDavid du Colombier print("operands\n");
293*4246b616SDavid du Colombier for(stk=andstack; stk<andp; stk++)
294*4246b616SDavid du Colombier print("0%o\t0%o\n", stk->first->type, stk->last->type);
295*4246b616SDavid du Colombier }
296*4246b616SDavid du Colombier
297*4246b616SDavid du Colombier static void
dump(Reprog * pp)298*4246b616SDavid du Colombier dump(Reprog *pp)
299*4246b616SDavid du Colombier {
300*4246b616SDavid du Colombier Reinst *l;
301*4246b616SDavid du Colombier Rune *p;
302*4246b616SDavid du Colombier
303*4246b616SDavid du Colombier l = pp->firstinst;
304*4246b616SDavid du Colombier do{
305*4246b616SDavid du Colombier print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type,
306*4246b616SDavid du Colombier l->left-pp->firstinst, l->right-pp->firstinst);
307*4246b616SDavid du Colombier if(l->type == RUNE)
308*4246b616SDavid du Colombier print("\t%C\n", l->r);
309*4246b616SDavid du Colombier else if(l->type == CCLASS || l->type == NCCLASS){
310*4246b616SDavid du Colombier print("\t[");
311*4246b616SDavid du Colombier if(l->type == NCCLASS)
312*4246b616SDavid du Colombier print("^");
313*4246b616SDavid du Colombier for(p = l->cp->spans; p < l->cp->end; p += 2)
314*4246b616SDavid du Colombier if(p[0] == p[1])
315*4246b616SDavid du Colombier print("%C", p[0]);
316*4246b616SDavid du Colombier else
317*4246b616SDavid du Colombier print("%C-%C", p[0], p[1]);
318*4246b616SDavid du Colombier print("]\n");
319*4246b616SDavid du Colombier } else
320*4246b616SDavid du Colombier print("\n");
321*4246b616SDavid du Colombier }while(l++->type);
322*4246b616SDavid du Colombier }
323*4246b616SDavid du Colombier #endif
324*4246b616SDavid du Colombier
325*4246b616SDavid du Colombier static Reclass*
newclass(void)326*4246b616SDavid du Colombier newclass(void)
327*4246b616SDavid du Colombier {
328*4246b616SDavid du Colombier if(nclass <= 0){
329*4246b616SDavid du Colombier classp = mallocz(128*sizeof(Reclass), 1);
330*4246b616SDavid du Colombier if(classp == nil)
331*4246b616SDavid du Colombier regerror("out of memory");
332*4246b616SDavid du Colombier nclass = 128;
333*4246b616SDavid du Colombier }
334*4246b616SDavid du Colombier return &classp[--nclass];
335*4246b616SDavid du Colombier }
336*4246b616SDavid du Colombier
337*4246b616SDavid du Colombier static int
nextc(Rune * rp)338*4246b616SDavid du Colombier nextc(Rune *rp)
339*4246b616SDavid du Colombier {
340*4246b616SDavid du Colombier if(lexdone){
341*4246b616SDavid du Colombier *rp = 0;
342*4246b616SDavid du Colombier return 1;
343*4246b616SDavid du Colombier }
344*4246b616SDavid du Colombier exprp += chartorune(rp, exprp);
345*4246b616SDavid du Colombier if(*rp == L'\\'){
346*4246b616SDavid du Colombier exprp += chartorune(rp, exprp);
347*4246b616SDavid du Colombier return 1;
348*4246b616SDavid du Colombier }
349*4246b616SDavid du Colombier if(*rp == 0)
350*4246b616SDavid du Colombier lexdone = 1;
351*4246b616SDavid du Colombier return 0;
352*4246b616SDavid du Colombier }
353*4246b616SDavid du Colombier
354*4246b616SDavid du Colombier static int
lex(int literal,int dot_type)355*4246b616SDavid du Colombier lex(int literal, int dot_type)
356*4246b616SDavid du Colombier {
357*4246b616SDavid du Colombier int quoted;
358*4246b616SDavid du Colombier
359*4246b616SDavid du Colombier quoted = nextc(&yyrune);
360*4246b616SDavid du Colombier if(literal || quoted){
361*4246b616SDavid du Colombier if(yyrune == 0)
362*4246b616SDavid du Colombier return END;
363*4246b616SDavid du Colombier return RUNE;
364*4246b616SDavid du Colombier }
365*4246b616SDavid du Colombier
366*4246b616SDavid du Colombier switch(yyrune){
367*4246b616SDavid du Colombier case 0:
368*4246b616SDavid du Colombier return END;
369*4246b616SDavid du Colombier case L'*':
370*4246b616SDavid du Colombier return STAR;
371*4246b616SDavid du Colombier case L'?':
372*4246b616SDavid du Colombier return QUEST;
373*4246b616SDavid du Colombier case L'+':
374*4246b616SDavid du Colombier return PLUS;
375*4246b616SDavid du Colombier case L'|':
376*4246b616SDavid du Colombier return OR;
377*4246b616SDavid du Colombier case L'.':
378*4246b616SDavid du Colombier return dot_type;
379*4246b616SDavid du Colombier case L'(':
380*4246b616SDavid du Colombier return LBRA;
381*4246b616SDavid du Colombier case L')':
382*4246b616SDavid du Colombier return RBRA;
383*4246b616SDavid du Colombier case L'^':
384*4246b616SDavid du Colombier return BOL;
385*4246b616SDavid du Colombier case L'$':
386*4246b616SDavid du Colombier return EOL;
387*4246b616SDavid du Colombier case L'[':
388*4246b616SDavid du Colombier return bldcclass();
389*4246b616SDavid du Colombier }
390*4246b616SDavid du Colombier return RUNE;
391*4246b616SDavid du Colombier }
392*4246b616SDavid du Colombier
393*4246b616SDavid du Colombier static int
bldcclass(void)394*4246b616SDavid du Colombier bldcclass(void)
395*4246b616SDavid du Colombier {
396*4246b616SDavid du Colombier int type;
397*4246b616SDavid du Colombier Rune r[NCCRUNE];
398*4246b616SDavid du Colombier Rune *p, *ep, *np;
399*4246b616SDavid du Colombier Rune rune;
400*4246b616SDavid du Colombier int quoted;
401*4246b616SDavid du Colombier
402*4246b616SDavid du Colombier /* we have already seen the '[' */
403*4246b616SDavid du Colombier type = CCLASS;
404*4246b616SDavid du Colombier yyclassp = newclass();
405*4246b616SDavid du Colombier
406*4246b616SDavid du Colombier /* look ahead for negation */
407*4246b616SDavid du Colombier /* SPECIAL CASE!!! negated classes don't match \n */
408*4246b616SDavid du Colombier ep = r;
409*4246b616SDavid du Colombier quoted = nextc(&rune);
410*4246b616SDavid du Colombier if(!quoted && rune == L'^'){
411*4246b616SDavid du Colombier type = NCCLASS;
412*4246b616SDavid du Colombier quoted = nextc(&rune);
413*4246b616SDavid du Colombier *ep++ = L'\n';
414*4246b616SDavid du Colombier *ep++ = L'\n';
415*4246b616SDavid du Colombier }
416*4246b616SDavid du Colombier
417*4246b616SDavid du Colombier /* parse class into a set of spans */
418*4246b616SDavid du Colombier for(; ep<&r[NCCRUNE];){
419*4246b616SDavid du Colombier if(rune == 0){
420*4246b616SDavid du Colombier rcerror("malformed '[]'");
421*4246b616SDavid du Colombier return 0;
422*4246b616SDavid du Colombier }
423*4246b616SDavid du Colombier if(!quoted && rune == L']')
424*4246b616SDavid du Colombier break;
425*4246b616SDavid du Colombier if(!quoted && rune == L'-'){
426*4246b616SDavid du Colombier if(ep == r){
427*4246b616SDavid du Colombier rcerror("malformed '[]'");
428*4246b616SDavid du Colombier return 0;
429*4246b616SDavid du Colombier }
430*4246b616SDavid du Colombier quoted = nextc(&rune);
431*4246b616SDavid du Colombier if((!quoted && rune == L']') || rune == 0){
432*4246b616SDavid du Colombier rcerror("malformed '[]'");
433*4246b616SDavid du Colombier return 0;
434*4246b616SDavid du Colombier }
435*4246b616SDavid du Colombier *(ep-1) = rune;
436*4246b616SDavid du Colombier } else {
437*4246b616SDavid du Colombier *ep++ = rune;
438*4246b616SDavid du Colombier *ep++ = rune;
439*4246b616SDavid du Colombier }
440*4246b616SDavid du Colombier quoted = nextc(&rune);
441*4246b616SDavid du Colombier }
442*4246b616SDavid du Colombier
443*4246b616SDavid du Colombier /* sort on span start */
444*4246b616SDavid du Colombier for(p = r; p < ep; p += 2){
445*4246b616SDavid du Colombier for(np = p; np < ep; np += 2)
446*4246b616SDavid du Colombier if(*np < *p){
447*4246b616SDavid du Colombier rune = np[0];
448*4246b616SDavid du Colombier np[0] = p[0];
449*4246b616SDavid du Colombier p[0] = rune;
450*4246b616SDavid du Colombier rune = np[1];
451*4246b616SDavid du Colombier np[1] = p[1];
452*4246b616SDavid du Colombier p[1] = rune;
453*4246b616SDavid du Colombier }
454*4246b616SDavid du Colombier }
455*4246b616SDavid du Colombier
456*4246b616SDavid du Colombier /* merge spans */
457*4246b616SDavid du Colombier np = yyclassp->spans;
458*4246b616SDavid du Colombier p = r;
459*4246b616SDavid du Colombier if(r == ep)
460*4246b616SDavid du Colombier yyclassp->end = np;
461*4246b616SDavid du Colombier else {
462*4246b616SDavid du Colombier np[0] = *p++;
463*4246b616SDavid du Colombier np[1] = *p++;
464*4246b616SDavid du Colombier for(; p < ep; p += 2)
465*4246b616SDavid du Colombier if(p[0] <= np[1]){
466*4246b616SDavid du Colombier if(p[1] > np[1])
467*4246b616SDavid du Colombier np[1] = p[1];
468*4246b616SDavid du Colombier } else {
469*4246b616SDavid du Colombier np += 2;
470*4246b616SDavid du Colombier np[0] = p[0];
471*4246b616SDavid du Colombier np[1] = p[1];
472*4246b616SDavid du Colombier }
473*4246b616SDavid du Colombier yyclassp->end = np+2;
474*4246b616SDavid du Colombier }
475*4246b616SDavid du Colombier
476*4246b616SDavid du Colombier return type;
477*4246b616SDavid du Colombier }
478*4246b616SDavid du Colombier
479*4246b616SDavid du Colombier static Reprog*
regcomp1(char * s,int literal,int dot_type)480*4246b616SDavid du Colombier regcomp1(char *s, int literal, int dot_type)
481*4246b616SDavid du Colombier {
482*4246b616SDavid du Colombier int token;
483*4246b616SDavid du Colombier Reprog *pp;
484*4246b616SDavid du Colombier
485*4246b616SDavid du Colombier /* get memory for the program */
486*4246b616SDavid du Colombier pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s));
487*4246b616SDavid du Colombier if(pp == 0){
488*4246b616SDavid du Colombier regerror("out of memory");
489*4246b616SDavid du Colombier return 0;
490*4246b616SDavid du Colombier }
491*4246b616SDavid du Colombier freep = pp->firstinst;
492*4246b616SDavid du Colombier classp = pp->class;
493*4246b616SDavid du Colombier errors = 0;
494*4246b616SDavid du Colombier
495*4246b616SDavid du Colombier if(setjmp(regkaboom))
496*4246b616SDavid du Colombier goto out;
497*4246b616SDavid du Colombier
498*4246b616SDavid du Colombier /* go compile the sucker */
499*4246b616SDavid du Colombier lexdone = 0;
500*4246b616SDavid du Colombier exprp = s;
501*4246b616SDavid du Colombier nclass = NCLASS;
502*4246b616SDavid du Colombier nbra = 0;
503*4246b616SDavid du Colombier atorp = atorstack;
504*4246b616SDavid du Colombier andp = andstack;
505*4246b616SDavid du Colombier subidp = subidstack;
506*4246b616SDavid du Colombier lastwasand = FALSE;
507*4246b616SDavid du Colombier cursubid = 0;
508*4246b616SDavid du Colombier
509*4246b616SDavid du Colombier /* Start with a low priority operator to prime parser */
510*4246b616SDavid du Colombier pushator(START-1);
511*4246b616SDavid du Colombier while((token = lex(literal, dot_type)) != END){
512*4246b616SDavid du Colombier if((token&0300) == OPERATOR)
513*4246b616SDavid du Colombier operator(token);
514*4246b616SDavid du Colombier else
515*4246b616SDavid du Colombier operand(token);
516*4246b616SDavid du Colombier }
517*4246b616SDavid du Colombier
518*4246b616SDavid du Colombier /* Close with a low priority operator */
519*4246b616SDavid du Colombier evaluntil(START);
520*4246b616SDavid du Colombier
521*4246b616SDavid du Colombier /* Force END */
522*4246b616SDavid du Colombier operand(END);
523*4246b616SDavid du Colombier evaluntil(START);
524*4246b616SDavid du Colombier #ifdef DEBUG
525*4246b616SDavid du Colombier dumpstack();
526*4246b616SDavid du Colombier #endif
527*4246b616SDavid du Colombier if(nbra)
528*4246b616SDavid du Colombier rcerror("unmatched left paren");
529*4246b616SDavid du Colombier --andp; /* points to first and only operand */
530*4246b616SDavid du Colombier pp->startinst = andp->first;
531*4246b616SDavid du Colombier #ifdef DEBUG
532*4246b616SDavid du Colombier dump(pp);
533*4246b616SDavid du Colombier #endif
534*4246b616SDavid du Colombier pp = optimize(pp);
535*4246b616SDavid du Colombier #ifdef DEBUG
536*4246b616SDavid du Colombier print("start: %d\n", andp->first-pp->firstinst);
537*4246b616SDavid du Colombier dump(pp);
538*4246b616SDavid du Colombier #endif
539*4246b616SDavid du Colombier out:
540*4246b616SDavid du Colombier if(errors){
541*4246b616SDavid du Colombier free(pp);
542*4246b616SDavid du Colombier pp = 0;
543*4246b616SDavid du Colombier }
544*4246b616SDavid du Colombier return pp;
545*4246b616SDavid du Colombier }
546*4246b616SDavid du Colombier
547*4246b616SDavid du Colombier extern Reprog*
regcomp(char * s)548*4246b616SDavid du Colombier regcomp(char *s)
549*4246b616SDavid du Colombier {
550*4246b616SDavid du Colombier return regcomp1(s, 0, ANY);
551*4246b616SDavid du Colombier }
552*4246b616SDavid du Colombier
553*4246b616SDavid du Colombier extern Reprog*
regcomplit(char * s)554*4246b616SDavid du Colombier regcomplit(char *s)
555*4246b616SDavid du Colombier {
556*4246b616SDavid du Colombier return regcomp1(s, 1, ANY);
557*4246b616SDavid du Colombier }
558*4246b616SDavid du Colombier
559*4246b616SDavid du Colombier extern Reprog*
regcompnl(char * s)560*4246b616SDavid du Colombier regcompnl(char *s)
561*4246b616SDavid du Colombier {
562*4246b616SDavid du Colombier return regcomp1(s, 0, ANYNL);
563*4246b616SDavid du Colombier }
564