xref: /inferno-os/utils/libregexp/rregexec.c (revision 74a4d8c26dd3c1e9febcb717cfd6cb6512991a7a)
1*74a4d8c2SCharles.Forsyth #include <lib9.h>
2*74a4d8c2SCharles.Forsyth #include "regexp.h"
3*74a4d8c2SCharles.Forsyth #include "regcomp.h"
4*74a4d8c2SCharles.Forsyth 
5*74a4d8c2SCharles.Forsyth /*
6*74a4d8c2SCharles.Forsyth  *  return	0 if no match
7*74a4d8c2SCharles.Forsyth  *		>0 if a match
8*74a4d8c2SCharles.Forsyth  *		<0 if we ran out of _relist space
9*74a4d8c2SCharles.Forsyth  */
10*74a4d8c2SCharles.Forsyth static int
rregexec1(Reprog * progp,Rune * bol,Resub * mp,int ms,Reljunk * j)11*74a4d8c2SCharles.Forsyth rregexec1(Reprog *progp,	/* program to run */
12*74a4d8c2SCharles.Forsyth 	Rune *bol,		/* string to run machine on */
13*74a4d8c2SCharles.Forsyth 	Resub *mp,		/* subexpression elements */
14*74a4d8c2SCharles.Forsyth 	int ms,			/* number of elements at mp */
15*74a4d8c2SCharles.Forsyth 	Reljunk *j)
16*74a4d8c2SCharles.Forsyth {
17*74a4d8c2SCharles.Forsyth 	int flag=0;
18*74a4d8c2SCharles.Forsyth 	Reinst *inst;
19*74a4d8c2SCharles.Forsyth 	Relist *tlp;
20*74a4d8c2SCharles.Forsyth 	Rune *s;
21*74a4d8c2SCharles.Forsyth 	int i, checkstart;
22*74a4d8c2SCharles.Forsyth 	Rune r, *rp, *ep;
23*74a4d8c2SCharles.Forsyth 	Relist* tl;		/* This list, next list */
24*74a4d8c2SCharles.Forsyth 	Relist* nl;
25*74a4d8c2SCharles.Forsyth 	Relist* tle;		/* ends of this and next list */
26*74a4d8c2SCharles.Forsyth 	Relist* nle;
27*74a4d8c2SCharles.Forsyth 	int match;
28*74a4d8c2SCharles.Forsyth 
29*74a4d8c2SCharles.Forsyth 	match = 0;
30*74a4d8c2SCharles.Forsyth 	checkstart = j->startchar;
31*74a4d8c2SCharles.Forsyth 	if(mp)
32*74a4d8c2SCharles.Forsyth 		for(i=0; i<ms; i++) {
33*74a4d8c2SCharles.Forsyth 			mp[i].s.rsp = 0;
34*74a4d8c2SCharles.Forsyth 			mp[i].e.rep = 0;
35*74a4d8c2SCharles.Forsyth 		}
36*74a4d8c2SCharles.Forsyth 	j->relist[0][0].inst = 0;
37*74a4d8c2SCharles.Forsyth 	j->relist[1][0].inst = 0;
38*74a4d8c2SCharles.Forsyth 
39*74a4d8c2SCharles.Forsyth 	/* Execute machine once for each character, including terminal NUL */
40*74a4d8c2SCharles.Forsyth 	s = j->rstarts;
41*74a4d8c2SCharles.Forsyth 	do{
42*74a4d8c2SCharles.Forsyth 
43*74a4d8c2SCharles.Forsyth 		/* fast check for first char */
44*74a4d8c2SCharles.Forsyth 		if(checkstart) {
45*74a4d8c2SCharles.Forsyth 			switch(j->starttype) {
46*74a4d8c2SCharles.Forsyth 			case RUNE:
47*74a4d8c2SCharles.Forsyth 				while(*s != j->startchar) {
48*74a4d8c2SCharles.Forsyth 					if(*s == 0 || s == j->reol)
49*74a4d8c2SCharles.Forsyth 						return match;
50*74a4d8c2SCharles.Forsyth 					s++;
51*74a4d8c2SCharles.Forsyth 				}
52*74a4d8c2SCharles.Forsyth 				break;
53*74a4d8c2SCharles.Forsyth 			case BOL:
54*74a4d8c2SCharles.Forsyth 				if(s == bol)
55*74a4d8c2SCharles.Forsyth 					break;
56*74a4d8c2SCharles.Forsyth 				while(*s != '\n') {
57*74a4d8c2SCharles.Forsyth 					if(*s == 0 || s == j->reol)
58*74a4d8c2SCharles.Forsyth 						return match;
59*74a4d8c2SCharles.Forsyth 					s++;
60*74a4d8c2SCharles.Forsyth 				}
61*74a4d8c2SCharles.Forsyth 				break;
62*74a4d8c2SCharles.Forsyth 			}
63*74a4d8c2SCharles.Forsyth 		}
64*74a4d8c2SCharles.Forsyth 
65*74a4d8c2SCharles.Forsyth 		r = *s;
66*74a4d8c2SCharles.Forsyth 
67*74a4d8c2SCharles.Forsyth 		/* switch run lists */
68*74a4d8c2SCharles.Forsyth 		tl = j->relist[flag];
69*74a4d8c2SCharles.Forsyth 		tle = j->reliste[flag];
70*74a4d8c2SCharles.Forsyth 		nl = j->relist[flag^=1];
71*74a4d8c2SCharles.Forsyth 		nle = j->reliste[flag];
72*74a4d8c2SCharles.Forsyth 		nl->inst = 0;
73*74a4d8c2SCharles.Forsyth 
74*74a4d8c2SCharles.Forsyth 		/* Add first instruction to current list */
75*74a4d8c2SCharles.Forsyth 		_rrenewemptythread(tl, progp->startinst, s);
76*74a4d8c2SCharles.Forsyth 
77*74a4d8c2SCharles.Forsyth 		/* Execute machine until current list is empty */
78*74a4d8c2SCharles.Forsyth 		for(tlp=tl; tlp->inst; tlp++){
79*74a4d8c2SCharles.Forsyth 			for(inst=tlp->inst; ; inst = inst->u2.next){
80*74a4d8c2SCharles.Forsyth 				switch(inst->type){
81*74a4d8c2SCharles.Forsyth 				case RUNE:	/* regular character */
82*74a4d8c2SCharles.Forsyth 					if(inst->u1.r == r)
83*74a4d8c2SCharles.Forsyth 						if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
84*74a4d8c2SCharles.Forsyth 							return -1;
85*74a4d8c2SCharles.Forsyth 					break;
86*74a4d8c2SCharles.Forsyth 				case LBRA:
87*74a4d8c2SCharles.Forsyth 					tlp->se.m[inst->u1.subid].s.rsp = s;
88*74a4d8c2SCharles.Forsyth 					continue;
89*74a4d8c2SCharles.Forsyth 				case RBRA:
90*74a4d8c2SCharles.Forsyth 					tlp->se.m[inst->u1.subid].e.rep = s;
91*74a4d8c2SCharles.Forsyth 					continue;
92*74a4d8c2SCharles.Forsyth 				case ANY:
93*74a4d8c2SCharles.Forsyth 					if(r != '\n')
94*74a4d8c2SCharles.Forsyth 						if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
95*74a4d8c2SCharles.Forsyth 							return -1;
96*74a4d8c2SCharles.Forsyth 					break;
97*74a4d8c2SCharles.Forsyth 				case ANYNL:
98*74a4d8c2SCharles.Forsyth 					if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
99*74a4d8c2SCharles.Forsyth 							return -1;
100*74a4d8c2SCharles.Forsyth 					break;
101*74a4d8c2SCharles.Forsyth 				case BOL:
102*74a4d8c2SCharles.Forsyth 					if(s == bol || *(s-1) == '\n')
103*74a4d8c2SCharles.Forsyth 						continue;
104*74a4d8c2SCharles.Forsyth 					break;
105*74a4d8c2SCharles.Forsyth 				case EOL:
106*74a4d8c2SCharles.Forsyth 					if(s == j->reol || r == 0 || r == '\n')
107*74a4d8c2SCharles.Forsyth 						continue;
108*74a4d8c2SCharles.Forsyth 					break;
109*74a4d8c2SCharles.Forsyth 				case CCLASS:
110*74a4d8c2SCharles.Forsyth 					ep = inst->u1.cp->end;
111*74a4d8c2SCharles.Forsyth 					for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
112*74a4d8c2SCharles.Forsyth 						if(r >= rp[0] && r <= rp[1]){
113*74a4d8c2SCharles.Forsyth 							if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
114*74a4d8c2SCharles.Forsyth 								return -1;
115*74a4d8c2SCharles.Forsyth 							break;
116*74a4d8c2SCharles.Forsyth 						}
117*74a4d8c2SCharles.Forsyth 					break;
118*74a4d8c2SCharles.Forsyth 				case NCCLASS:
119*74a4d8c2SCharles.Forsyth 					ep = inst->u1.cp->end;
120*74a4d8c2SCharles.Forsyth 					for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
121*74a4d8c2SCharles.Forsyth 						if(r >= rp[0] && r <= rp[1])
122*74a4d8c2SCharles.Forsyth 							break;
123*74a4d8c2SCharles.Forsyth 					if(rp == ep)
124*74a4d8c2SCharles.Forsyth 						if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
125*74a4d8c2SCharles.Forsyth 							return -1;
126*74a4d8c2SCharles.Forsyth 					break;
127*74a4d8c2SCharles.Forsyth 				case OR:
128*74a4d8c2SCharles.Forsyth 					/* evaluate right choice later */
129*74a4d8c2SCharles.Forsyth 					if(_renewthread(tlp, inst->u1.right, &tlp->se) == tle)
130*74a4d8c2SCharles.Forsyth 						return -1;
131*74a4d8c2SCharles.Forsyth 					/* efficiency: advance and re-evaluate */
132*74a4d8c2SCharles.Forsyth 					continue;
133*74a4d8c2SCharles.Forsyth 				case END:	/* Match! */
134*74a4d8c2SCharles.Forsyth 					match = 1;
135*74a4d8c2SCharles.Forsyth 					tlp->se.m[0].e.rep = s;
136*74a4d8c2SCharles.Forsyth 					if(mp != 0)
137*74a4d8c2SCharles.Forsyth 						_renewmatch(mp, ms, &tlp->se);
138*74a4d8c2SCharles.Forsyth 					break;
139*74a4d8c2SCharles.Forsyth 				}
140*74a4d8c2SCharles.Forsyth 				break;
141*74a4d8c2SCharles.Forsyth 			}
142*74a4d8c2SCharles.Forsyth 		}
143*74a4d8c2SCharles.Forsyth 		if(s == j->reol)
144*74a4d8c2SCharles.Forsyth 			break;
145*74a4d8c2SCharles.Forsyth 		checkstart = j->startchar && nl->inst==0;
146*74a4d8c2SCharles.Forsyth 		s++;
147*74a4d8c2SCharles.Forsyth 	}while(r);
148*74a4d8c2SCharles.Forsyth 	return match;
149*74a4d8c2SCharles.Forsyth }
150*74a4d8c2SCharles.Forsyth 
151*74a4d8c2SCharles.Forsyth static int
rregexec2(Reprog * progp,Rune * bol,Resub * mp,int ms,Reljunk * j)152*74a4d8c2SCharles.Forsyth rregexec2(Reprog *progp,	/* program to run */
153*74a4d8c2SCharles.Forsyth 	Rune *bol,	/* string to run machine on */
154*74a4d8c2SCharles.Forsyth 	Resub *mp,	/* subexpression elements */
155*74a4d8c2SCharles.Forsyth 	int ms,		/* number of elements at mp */
156*74a4d8c2SCharles.Forsyth 	Reljunk *j
157*74a4d8c2SCharles.Forsyth )
158*74a4d8c2SCharles.Forsyth {
159*74a4d8c2SCharles.Forsyth 	Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE];
160*74a4d8c2SCharles.Forsyth 
161*74a4d8c2SCharles.Forsyth 	/* mark space */
162*74a4d8c2SCharles.Forsyth 	j->relist[0] = relist0;
163*74a4d8c2SCharles.Forsyth 	j->relist[1] = relist1;
164*74a4d8c2SCharles.Forsyth 	j->reliste[0] = relist0 + nelem(relist0) - 2;
165*74a4d8c2SCharles.Forsyth 	j->reliste[1] = relist1 + nelem(relist1) - 2;
166*74a4d8c2SCharles.Forsyth 
167*74a4d8c2SCharles.Forsyth 	return rregexec1(progp, bol, mp, ms, j);
168*74a4d8c2SCharles.Forsyth }
169*74a4d8c2SCharles.Forsyth 
170*74a4d8c2SCharles.Forsyth extern int
rregexec(Reprog * progp,Rune * bol,Resub * mp,int ms)171*74a4d8c2SCharles.Forsyth rregexec(Reprog *progp,	/* program to run */
172*74a4d8c2SCharles.Forsyth 	Rune *bol,	/* string to run machine on */
173*74a4d8c2SCharles.Forsyth 	Resub *mp,	/* subexpression elements */
174*74a4d8c2SCharles.Forsyth 	int ms)		/* number of elements at mp */
175*74a4d8c2SCharles.Forsyth {
176*74a4d8c2SCharles.Forsyth 	Reljunk j;
177*74a4d8c2SCharles.Forsyth 	Relist relist0[LISTSIZE], relist1[LISTSIZE];
178*74a4d8c2SCharles.Forsyth 	int rv;
179*74a4d8c2SCharles.Forsyth 
180*74a4d8c2SCharles.Forsyth 	/*
181*74a4d8c2SCharles.Forsyth  	 *  use user-specified starting/ending location if specified
182*74a4d8c2SCharles.Forsyth 	 */
183*74a4d8c2SCharles.Forsyth 	j.rstarts = bol;
184*74a4d8c2SCharles.Forsyth 	j.reol = 0;
185*74a4d8c2SCharles.Forsyth 	if(mp && ms>0){
186*74a4d8c2SCharles.Forsyth 		if(mp->s.sp)
187*74a4d8c2SCharles.Forsyth 			j.rstarts = mp->s.rsp;
188*74a4d8c2SCharles.Forsyth 		if(mp->e.ep)
189*74a4d8c2SCharles.Forsyth 			j.reol = mp->e.rep;
190*74a4d8c2SCharles.Forsyth 	}
191*74a4d8c2SCharles.Forsyth 	j.starttype = 0;
192*74a4d8c2SCharles.Forsyth 	j.startchar = 0;
193*74a4d8c2SCharles.Forsyth 	if(progp->startinst->type == RUNE && progp->startinst->u1.r < (Rune)Runeself) {
194*74a4d8c2SCharles.Forsyth 		j.starttype = RUNE;
195*74a4d8c2SCharles.Forsyth 		j.startchar = progp->startinst->u1.r;
196*74a4d8c2SCharles.Forsyth 	}
197*74a4d8c2SCharles.Forsyth 	if(progp->startinst->type == BOL)
198*74a4d8c2SCharles.Forsyth 		j.starttype = BOL;
199*74a4d8c2SCharles.Forsyth 
200*74a4d8c2SCharles.Forsyth 	/* mark space */
201*74a4d8c2SCharles.Forsyth 	j.relist[0] = relist0;
202*74a4d8c2SCharles.Forsyth 	j.relist[1] = relist1;
203*74a4d8c2SCharles.Forsyth 	j.reliste[0] = relist0 + nelem(relist0) - 2;
204*74a4d8c2SCharles.Forsyth 	j.reliste[1] = relist1 + nelem(relist1) - 2;
205*74a4d8c2SCharles.Forsyth 
206*74a4d8c2SCharles.Forsyth 	rv = rregexec1(progp, bol, mp, ms, &j);
207*74a4d8c2SCharles.Forsyth 	if(rv >= 0)
208*74a4d8c2SCharles.Forsyth 		return rv;
209*74a4d8c2SCharles.Forsyth 	rv = rregexec2(progp, bol, mp, ms, &j);
210*74a4d8c2SCharles.Forsyth 	if(rv >= 0)
211*74a4d8c2SCharles.Forsyth 		return rv;
212*74a4d8c2SCharles.Forsyth 	return -1;
213*74a4d8c2SCharles.Forsyth }
214