xref: /inferno-os/utils/libregexp/regexec.c (revision c094a1409b780cc543c077e8469fdb28b4c90afb)
1 #include <lib9.h>
2 #include "regexp.h"
3 #include "regcomp.h"
4 
5 
6 /*
7  *  return	0 if no match
8  *		>0 if a match
9  *		<0 if we ran out of _relist space
10  */
11 static int
12 regexec1(Reprog *progp,	/* program to run */
13 	char *bol,	/* string to run machine on */
14 	Resub *mp,	/* subexpression elements */
15 	int ms,		/* number of elements at mp */
16 	Reljunk *j
17 )
18 {
19 	int flag=0;
20 	Reinst *inst;
21 	Relist *tlp;
22 	char *s;
23 	int i, checkstart;
24 	Rune r, *rp, *ep;
25 	int n;
26 	Relist* tl;		/* This list, next list */
27 	Relist* nl;
28 	Relist* tle;		/* ends of this and next list */
29 	Relist* nle;
30 	int match;
31 	char *p;
32 
33 	match = 0;
34 	checkstart = j->starttype;
35 	if(mp)
36 		for(i=0; i<ms; i++) {
37 			mp[i].s.sp = 0;
38 			mp[i].e.ep = 0;
39 		}
40 	j->relist[0][0].inst = 0;
41 	j->relist[1][0].inst = 0;
42 
43 	/* Execute machine once for each character, including terminal NUL */
44 	s = j->starts;
45 	do{
46 		/* fast check for first char */
47 		if(checkstart) {
48 			switch(j->starttype) {
49 			case RUNE:
50 				p = utfrune(s, j->startchar);
51 				if(p == 0 || s == j->eol)
52 					return match;
53 				s = p;
54 				break;
55 			case BOL:
56 				if(s == bol)
57 					break;
58 				p = utfrune(s, '\n');
59 				if(p == 0 || s == j->eol)
60 					return match;
61 				s = p;
62 				break;
63 			}
64 		}
65 		r = *(uchar*)s;
66 		if(r < (Rune)Runeself)
67 			n = 1;
68 		else
69 			n = chartorune(&r, s);
70 
71 		/* switch run lists */
72 		tl = j->relist[flag];
73 		tle = j->reliste[flag];
74 		nl = j->relist[flag^=1];
75 		nle = j->reliste[flag];
76 		nl->inst = 0;
77 
78 		/* Add first instruction to current list */
79 		if(match == 0)
80 			_renewemptythread(tl, progp->startinst, s);
81 
82 		/* Execute machine until current list is empty */
83 		for(tlp=tl; tlp->inst; tlp++){	/* assignment = */
84 			for(inst = tlp->inst; ; inst = inst->u2.next){
85 				switch(inst->type){
86 				case RUNE:	/* regular character */
87 					if(inst->u1.r == r){
88 						if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
89 							return -1;
90 					}
91 					break;
92 				case LBRA:
93 					tlp->se.m[inst->u1.subid].s.sp = s;
94 					continue;
95 				case RBRA:
96 					tlp->se.m[inst->u1.subid].e.ep = s;
97 					continue;
98 				case ANY:
99 					if(r != '\n')
100 						if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
101 							return -1;
102 					break;
103 				case ANYNL:
104 					if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
105 							return -1;
106 					break;
107 				case BOL:
108 					if(s == bol || *(s-1) == '\n')
109 						continue;
110 					break;
111 				case EOL:
112 					if(s == j->eol || r == 0 || r == '\n')
113 						continue;
114 					break;
115 				case CCLASS:
116 					ep = inst->u1.cp->end;
117 					for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
118 						if(r >= rp[0] && r <= rp[1]){
119 							if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
120 								return -1;
121 							break;
122 						}
123 					break;
124 				case NCCLASS:
125 					ep = inst->u1.cp->end;
126 					for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
127 						if(r >= rp[0] && r <= rp[1])
128 							break;
129 					if(rp == ep)
130 						if(_renewthread(nl, inst->u2.next, &tlp->se)==nle)
131 							return -1;
132 					break;
133 				case OR:
134 					/* evaluate right choice later */
135 					if(_renewthread(tlp, inst->u1.right, &tlp->se) == tle)
136 						return -1;
137 					/* efficiency: advance and re-evaluate */
138 					continue;
139 				case END:	/* Match! */
140 					match = 1;
141 					tlp->se.m[0].e.ep = s;
142 					if(mp != 0)
143 						_renewmatch(mp, ms, &tlp->se);
144 					break;
145 				}
146 				break;
147 			}
148 		}
149 		if(s == j->eol)
150 			break;
151 		checkstart = j->starttype && nl->inst==0;
152 		s += n;
153 	}while(r);
154 	return match;
155 }
156 
157 static int
158 regexec2(Reprog *progp,	/* program to run */
159 	char *bol,	/* string to run machine on */
160 	Resub *mp,	/* subexpression elements */
161 	int ms,		/* number of elements at mp */
162 	Reljunk *j
163 )
164 {
165 	Relist relist0[BIGLISTSIZE], relist1[BIGLISTSIZE];
166 
167 	/* mark space */
168 	j->relist[0] = relist0;
169 	j->relist[1] = relist1;
170 	j->reliste[0] = relist0 + nelem(relist0) - 2;
171 	j->reliste[1] = relist1 + nelem(relist1) - 2;
172 
173 	return regexec1(progp, bol, mp, ms, j);
174 }
175 
176 extern int
177 regexec(Reprog *progp,	/* program to run */
178 	char *bol,	/* string to run machine on */
179 	Resub *mp,	/* subexpression elements */
180 	int ms)		/* number of elements at mp */
181 {
182 	Reljunk j;
183 	Relist relist0[LISTSIZE], relist1[LISTSIZE];
184 	int rv;
185 
186 	/*
187  	 *  use user-specified starting/ending location if specified
188 	 */
189 	j.starts = bol;
190 	j.eol = 0;
191 	if(mp && ms>0){
192 		if(mp->s.sp)
193 			j.starts = mp->s.sp;
194 		if(mp->e.ep)
195 			j.eol = mp->e.ep;
196 	}
197 	j.starttype = 0;
198 	j.startchar = 0;
199 	if(progp->startinst->type == RUNE && progp->startinst->u1.r < (Rune)Runeself) {
200 		j.starttype = RUNE;
201 		j.startchar = progp->startinst->u1.r;
202 	}
203 	if(progp->startinst->type == BOL)
204 		j.starttype = BOL;
205 
206 	/* mark space */
207 	j.relist[0] = relist0;
208 	j.relist[1] = relist1;
209 	j.reliste[0] = relist0 + nelem(relist0) - 2;
210 	j.reliste[1] = relist1 + nelem(relist1) - 2;
211 
212 	rv = regexec1(progp, bol, mp, ms, &j);
213 	if(rv >= 0)
214 		return rv;
215 	rv = regexec2(progp, bol, mp, ms, &j);
216 	if(rv >= 0)
217 		return rv;
218 	return -1;
219 }
220